You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

719 lines
38 KiB

  1. <?php
  2. // turn on warnings and notice during developement
  3. include('initialize/PhpErrorSettings.inc.php');
  4. // Project: Web Reference Database (refbase) <http://www.refbase.net>
  5. // Copyright: Matthias Steffens <mailto:refbase@extracts.de> and the file's
  6. // original author(s).
  7. //
  8. // This code is distributed in the hope that it will be useful,
  9. // but WITHOUT ANY WARRANTY. Please see the GNU General Public
  10. // License for more details.
  11. //
  12. // File: ./import_modify.php
  13. // Repository: $HeadURL: file:///svn/p/refbase/code/branches/bleeding-edge/import_modify.php $
  14. // Author(s): Matthias Steffens <mailto:refbase@extracts.de>
  15. //
  16. // Created: 17-Feb-06, 20:57
  17. // Modified: $Date: 2012-02-28 23:23:23 +0000 (Tue, 28 Feb 2012) $
  18. // $Author: msteffens $
  19. // $Revision: 1343 $
  20. // This php script accepts input from 'import.php' and will process records exported from Endnote, Reference Manager (RIS), BibTeX, ISI Web of Science,
  21. // Pubmed, CSA or Copac. In case of a single record, the script will call 'record.php' with all provided fields pre-filled. The user can then verify
  22. // the data, add or modify any details as necessary and add the record to the database. Multiple records will be imported directly.
  23. // TODO: I18n
  24. // Incorporate some include files:
  25. include 'initialize/db.inc.php'; // 'db.inc.php' is included to hide username and password
  26. include 'includes/include.inc.php'; // include common functions
  27. include 'includes/execute.inc.php'; // include functions that deal with execution of shell commands
  28. include 'includes/import.inc.php'; // include common import functions
  29. include 'initialize/ini.inc.php'; // include common variables
  30. // --------------------------------------------------------------------
  31. // START A SESSION:
  32. // call the 'start_session()' function (from 'include.inc.php') which will also read out available session variables:
  33. start_session(true);
  34. // --------------------------------------------------------------------
  35. // Initialize preferred display language:
  36. // (note that 'locales.inc.php' has to be included *after* the call to the 'start_session()' function)
  37. include 'includes/locales.inc.php'; // include the locales
  38. // --------------------------------------------------------------------
  39. // Clear any errors that might have been found previously:
  40. $errors = array();
  41. // Write the (POST or GET) form variables into an array:
  42. foreach($_REQUEST as $varname => $value)
  43. {
  44. // remove slashes from parameter values if 'magic_quotes_gpc = On':
  45. $formVars[$varname] = stripSlashesIfMagicQuotes($value); // function 'stripSlashesIfMagicQuotes()' is defined in 'include.inc.php'
  46. }
  47. // --------------------------------------------------------------------
  48. // Extract the ID of the client from which the query originated:
  49. // this identifier is used to identify queries that originated from the refbase command line clients ("cli-refbase-1.0.1", "cli-refbase_import-1.0") or from a bookmarklet (e.g., "jsb-refbase-1.0.0")
  50. if (isset($formVars['client']))
  51. $client = $formVars['client'];
  52. else
  53. $client = "";
  54. if (preg_match("/^jsb/i", $client)) // if data were sent via a bookmarklet, we set some variables directly
  55. {
  56. $formVars['formType'] = "import";
  57. $formVars['importRecordsRadio'] = "all";
  58. $formVars['importRecords'] = "1";
  59. $formVars['showSource'] = "1";
  60. }
  61. // Save the URL of the referring page the 'referer' session variable:
  62. // NOTE: For 'import_modify.php' we probably want to *always* set the referrer to 'import.php' since the preference of function 'start_session()'
  63. // for a referrer that was saved in a session variable may lead back to the wrong page if the user used the back button of his browser.
  64. // This happens e.g. if:
  65. // 1. the user imports, say, ID 'arXiv:cond-mat/0703452' which gets loaded into the 'record.php' form
  66. // 2. the user uses his browser's back button to switch back to the 'import.php' form
  67. // 3. the user attempts to import 'arXiv:cond-mat/070345' (which is an incorrect arXiv ID)
  68. // In that case, if the referrer gets loaded from the session variable, it will redirect back to 'record.php' (instead of 'import.php').
  69. // This can be circumvented either by saving the '$_SERVER['HTTP_REFERER']' to the 'referer' session variable explicitly, or by simply
  70. // hardcoding '$referer' to "import.php" (which is what we do here)
  71. // $referer = $_SERVER['HTTP_REFERER'];
  72. // saveSessionVariable("referer", $referer); // function 'saveSessionVariable()' is defined in 'include.inc.php'
  73. // Set the default referrer if no referrer is available or if it just points to 'index.php' (or if the data were sent via a bookmarklet):
  74. // if (empty($referer) OR ($referer == "index.php") OR preg_match("/^jsb/i", $client)) // variable '$referer' is globally defined in function 'start_session()' in 'include.inc.php'
  75. $referer = "import.php"; // on error, we'll (by default) redirect to the import form
  76. // First of all, check if the user is logged in:
  77. if (!isset($_SESSION['loginEmail'])) // -> if the user isn't logged in
  78. {
  79. header("Location: user_login.php?referer=" . rawurlencode($referer)); // ask the user to login first, then he'll get directed back to the calling page (normally, 'import.php')
  80. exit; // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> !EXIT! <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
  81. }
  82. // now, check if the (logged in) user is allowed to import any record into the database:
  83. if (isset($_SESSION['user_permissions']) AND !preg_match("/allow_import|allow_batch_import/", $_SESSION['user_permissions'])) // if the 'user_permissions' session variable does NOT contain either 'allow_import' or 'allow_batch_import'...
  84. {
  85. // return an appropriate error message:
  86. $HeaderString = returnMsg($loc["NoPermission"] . $loc["NoPermission_ForImport"] . "!", "warning", "strong", "HeaderString"); // function 'returnMsg()' is defined in 'include.inc.php'
  87. if (!preg_match("/^cli/i", $client))
  88. header("Location: index.php"); // redirect back to main page ('index.php')
  89. exit; // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> !EXIT! <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
  90. }
  91. // --------------------------------------------------------------------
  92. // EXTRACT FORM VARIABLES:
  93. // Note: Although we could use the '$formVars' array directly below (e.g.: $formVars['sourceText'] etc., like in 'user_validation.php'), we'll read out
  94. // all variables individually again. This is done to enhance readability. (A smarter way of doing so seems to be the use of the 'extract()' function, but that
  95. // may expose yet another security hole...)
  96. // Get the form used by the user:
  97. if (isset($formVars['formType']))
  98. $formType = $formVars['formType'];
  99. else
  100. $formType = "import";
  101. // In case of the main import form, get the source text containing the bibliographic record(s):
  102. // Note that data from any successfully uploaded file will override data pasted into the 'sourceText' text entry field
  103. if (isset($formVars['sourceText']))
  104. $sourceText = $formVars['sourceText'];
  105. else
  106. $sourceText = "";
  107. // In case of the "Import IDs" form (which imports records from PubMed ID, arXiv ID, DOI or OpenURL), get the entered IDs:
  108. if (isset($formVars['sourceIDs']))
  109. $sourceIDs = $formVars['sourceIDs'];
  110. else
  111. $sourceIDs = "";
  112. // If data were sent via a bookmarklet, get the URL containing the posted data:
  113. if (isset($formVars['sourceURL']))
  114. $sourceURL = $formVars['sourceURL'];
  115. else
  116. $sourceURL = "";
  117. // Check whether we're supposed to display the original source data:
  118. if (isset($formVars['showSource']))
  119. $showSource = $formVars['showSource'];
  120. else
  121. $showSource = "";
  122. if (isset($_SESSION['user_permissions']) AND preg_match("/allow_batch_import/", $_SESSION['user_permissions'])) // if the 'user_permissions' session variable does contain 'allow_batch_import'...
  123. {
  124. // Check whether we're supposed to import all records ('all') or just particular ones ('only'):
  125. if (isset($formVars['importRecordsRadio']))
  126. $importRecordsRadio = $formVars['importRecordsRadio'];
  127. else
  128. $importRecordsRadio = "";
  129. // Get the record numbers of those records that shall be imported:
  130. // examples of recognized formats: '1-5' imports the first five records; '1 3 7' will import records 1, 3 and 7; '1-3 5-7 9' will import records 1, 2, 3, 5, 6, 7 and 9
  131. // (note that the first three records could be labelled e.g. as 'Record 12 of 52', 'Record 30 of 112' and 'Record 202 of 533' but they must be referred to as records '1-3'
  132. // in the 'importRecords' form)
  133. if (isset($formVars['importRecords']))
  134. $importRecords = $formVars['importRecords'];
  135. else
  136. $importRecords = "";
  137. }
  138. else // if the user is only allowed to import one record at a time, we'll always import the very first record
  139. {
  140. $importRecordsRadio = "only";
  141. $importRecords = "1";
  142. }
  143. // Check whether we're supposed to skip records with unrecognized data format:
  144. if (isset($formVars['skipBadRecords']))
  145. $skipBadRecords = $formVars['skipBadRecords'];
  146. else
  147. $skipBadRecords = "";
  148. // Check if a file was uploaded:
  149. // (note that to have file uploads work, HTTP file uploads must be allowed within your 'php.ini' configuration file
  150. // by setting the 'file_uploads' parameter to 'On'!)
  151. // extract file information into a four (or five) element associative array containing the following information about the file:
  152. // name - original name of file on client
  153. // type - MIME type of file
  154. // tmp_name - name of temporary file on server
  155. // error - holds an error number >0 if something went wrong, otherwise 0 (I don't know when this element was added. It may not be present in your PHP version... ?:-/)
  156. // size - size of file in bytes
  157. // depending what happend on upload, they will contain the following values (PHP 4.1 and above):
  158. // no file upload upload exceeds 'upload_max_filesize' successful upload
  159. // -------------- ------------------------------------ -----------------
  160. // name "" [name] [name]
  161. // type "" "" [type]
  162. // tmp_name "" OR "none" "" [tmp_name]
  163. // error 4 1 0
  164. // size 0 0 [size]
  165. $uploadFile = getUploadInfo("uploadFile"); // function 'getUploadInfo()' is defined in 'include.inc.php'
  166. $tmpFilePath = "";
  167. // Validate the 'uploadFile' field:
  168. // TODO: Move code that validates file uploads into its own function (and merge with related code from 'modify.php')
  169. // (which must not exceed the 'upload_max_filesize' specified within your 'php.ini' configuration file)
  170. if (!empty($uploadFile) && !empty($uploadFile["name"])) // if the user attempted to upload a file
  171. {
  172. // The 'is_uploaded_file()' function returns 'true' if the file indicated by '$uploadFile["tmp_name"]' was uploaded via HTTP POST. This is useful to help ensure
  173. // that a malicious user hasn't tried to trick the script into working on files upon which it should not be working - for instance, /etc/passwd.
  174. if (is_uploaded_file($uploadFile["tmp_name"]))
  175. {
  176. if (empty($uploadFile["tmp_name"])) // no tmp file exists => we assume that the maximum upload file size was exceeded!
  177. // or check via 'error' element instead: "if ($uploadFile["error"] == 1)" (the 'error' element exists since PHP 4.2.0)
  178. {
  179. $maxFileSize = ini_get("upload_max_filesize");
  180. $fileError = "File size must not be greater than " . $maxFileSize . ":";
  181. $errors["uploadFile"] = $fileError; // inform the user that the maximum upload file size was exceeded
  182. }
  183. else // a tmp file exists...
  184. {
  185. // prevent hackers from gaining access to the systems 'passwd' file (this should be prevented by the 'is_uploaded_file()' function but anyhow):
  186. if (preg_match("/^passwd$/i", $uploadFile["name"])) // file name must not be 'passwd'
  187. $errors["uploadFile"] = "This file name is not allowed!";
  188. // check for invalid file name extensions:
  189. elseif (preg_match("/\.(exe|com|bat|zip|php|phps|php3|cgi)$/i", $uploadFile["name"])) // file name has an invalid file name extension (adjust the regex pattern if you want more relaxed file name validation)
  190. $errors["uploadFile"] = "You cannot upload this type of file!"; // file name must not end with .exe, .com, .bat, .zip, .php, .phps, .php3 or .cgi
  191. else
  192. $tmpFilePath = $uploadFile["tmp_name"];
  193. }
  194. }
  195. else
  196. {
  197. switch($uploadFile["error"])
  198. {
  199. case 0: // no error; possible file attack!
  200. $errors["uploadFile"] = "There was a problem with your upload.";
  201. break;
  202. case 1: // uploaded file exceeds the 'upload_max_filesize' directive in 'php.ini'
  203. $maxFileSize = ini_get("upload_max_filesize");
  204. $fileError = "File size must not be greater than " . $maxFileSize . ":";
  205. $errors["uploadFile"] = $fileError;
  206. break;
  207. case 2: // uploaded file exceeds the MAX_FILE_SIZE directive that was specified in the html form (Note: refbase doesn't currently specify MAX_FILE_SIZE but anyhow...)
  208. $errors["uploadFile"] = "The file you are trying to upload is too big.";
  209. break;
  210. case 3: // uploaded file was only partially uploaded
  211. $errors["uploadFile"] = "The file you are trying to upload was only partially uploaded.";
  212. break;
  213. case 4: // no file was uploaded
  214. $errors["uploadFile"] = "You must select a file for upload.";
  215. break;
  216. case 6:
  217. $errors["uploadFile"] = "Missing a temporary folder.";
  218. break;
  219. default: // a default error, just in case! :)
  220. $errors["uploadFile"] = "There was a problem with your upload.";
  221. break;
  222. }
  223. }
  224. }
  225. if (!empty($uploadFile) && !empty($tmpFilePath)) // if there was a file uploaded successfully
  226. {
  227. // Get file contents:
  228. $fileData = readFromFile($tmpFilePath); // function 'readFromFile()' is defined in 'execute.inc.php'
  229. if (!empty($fileData))
  230. // Data from any successfully uploaded file will override data pasted into the 'sourceText' text entry field
  231. $sourceText = $fileData;
  232. }
  233. // --------------------------------------------------------------------
  234. // PRE-PROCESS DATA INPUT:
  235. // In case of a latin1-based database, attempt to convert UTF-8 data to refbase markup & latin1:
  236. // NOTE: For a latin1-based database, data pasted into the 'sourceText' text entry field will be always returned in ISO-8859-1 encoding (see notes above function
  237. // 'decodeHTML()' below). However, data that were received via a file upload (or from a client such as Bookends) will have the encoding of the original file
  238. // (which may be UTF-8 encoded).
  239. if (($contentTypeCharset == "ISO-8859-1") AND (detectCharacterEncoding($sourceText) == "UTF-8")) // function 'detectCharacterEncoding()' is defined in 'include.inc.php'
  240. $sourceText = convertToCharacterEncoding("ISO-8859-1", "TRANSLIT", $sourceText, "UTF-8"); // function 'convertToCharacterEncoding()' is defined in 'include.inc.php'
  241. // Decode any HTML entities remaining in the source text:
  242. // NOTE: - Web browsers send back form data in the same encoding as the page containing the form. So if a user imports UTF-8 data (via the 'sourceText' text entry form) into
  243. // a latin1-based database, non-latin1 characters will be encoded by the browser as HTML entities (e.g., the greek delta character would be represented as '&#948;'
  244. // in the source text). Therefore, we'll use function 'decodeHTML()' to convert any remaining HTML entities first to UTF-8, then convert Unicode entities to refbase
  245. // markup (if possible), and finally transform all Unicode characters that can't be successfully converted to their ASCII equivalents.
  246. // - Alternatively, it might be easier to always use UTF-8 as page encoding for 'import.php' so that we'll always receive UTF-8 encoded data, then use function
  247. // 'detectCharacterEncoding()' to detect the actual character encoding of the given source text, and convert to refbase markup/latin1 if needed.
  248. //
  249. // TODO: - this conversion causes invalid XML when importing MODS XML that contains encoded angle brackets! (e.g. '<title>Harbours &lt;dt.&gt;</title>');
  250. // to work around this issue, the next line needs to be commented out; for a real fix, the decoding of characters needs to be adopted based on the '$sourceFormat'
  251. // (which, ATM, is only identified further down below)
  252. $sourceText = decodeHTML($contentTypeCharset, $sourceText); // function 'decodeHTML()' is defined in 'include.inc.php', and '$contentTypeCharset' is defined in 'ini.inc.php'
  253. // Process record number input:
  254. $importRecordNumbersArray = array(); // initialize array variable which will hold all the record numbers that shall be imported
  255. if (!empty($importRecords))
  256. {
  257. // split input string on all but digits or the hyphen ("-") character:
  258. // (the 'PREG_SPLIT_NO_EMPTY' flag causes only non-empty pieces to be returned)
  259. $importRecordsArray = preg_split("/[^0-9-]+/", $importRecords, -1, PREG_SPLIT_NO_EMPTY); // this keeps only elements such as '1', '3-5', '3-5-9' or '3-' (we'll deal with the last two cases below)
  260. foreach ($importRecordsArray as $importRecordsElement)
  261. {
  262. if (preg_match("/\d+-\d+/", $importRecordsElement)) // if we're dealing with a range of record numbers (such as '1-5')
  263. {
  264. $importRecordsElementArray = preg_split("/-/", $importRecordsElement); // split input string on hyphen ("-") character
  265. // generate an array that includes all numbers from start number to end number:
  266. // (in case of incorrect input (such as '3-5-9') we'll only take the first two numbers and ignore anything else)
  267. $importRecordRangeArray = range($importRecordsElementArray[0], $importRecordsElementArray[1]);
  268. foreach ($importRecordRangeArray as $importRecordNumber) // append all record numbers within range to array
  269. $importRecordNumbersArray[] = $importRecordNumber;
  270. }
  271. else // this element contains just a single record number
  272. {
  273. // append this record number to array:
  274. $importRecordNumbersArray[] = preg_replace("/(\d+).*/", "\\1", $importRecordsElement); // we account for the case that '$importRecordsElement' contains something like '3-'
  275. }
  276. }
  277. }
  278. // validation will throw up an error if we're supposed to import only particular records but no record numbers were specified
  279. // Remove any duplicate record number(s) from the list of extracted record numbers:
  280. $importRecordNumbersArray = array_unique($importRecordNumbersArray);
  281. // --------------------------------------------------------------------
  282. // IDENTIFY SOURCE FORMAT:
  283. // if the source text originated from the main 'import' form provided by 'import.php':
  284. if ($formType == "import")
  285. // attempt to identify the format of the input text:
  286. $sourceFormat = identifySourceFormat($sourceText); // function 'identifySourceFormat()' is defined in 'import.inc.php'
  287. // else if source text originated from the "Import IDs" form (which imports records from PubMed ID, arXiv ID, DOI or OpenURL):
  288. elseif ($formType == "importID")
  289. $sourceFormat = identifySourceID($sourceIDs); // function 'identifySourceID()' is defined in 'import.inc.php'
  290. // --------------------------------------------------------------------
  291. // FETCH DATA FROM URL:
  292. // In case of import via ID:
  293. // TODO: Modify the code so that '$sourceIDs' can contain a mixture of any supported IDs.
  294. if (($formType == "importID") AND !empty($sourceIDs) AND !empty($sourceFormat))
  295. {
  296. // - PubMed IDs:
  297. if (preg_match("/^Pubmed (Medline|XML)$/i", $sourceFormat) AND preg_match("/[0-9]/", $sourceIDs))
  298. {
  299. // Split on any whitespace between PubMed IDs:
  300. $idArray = preg_split("/\s+/", $sourceIDs, -1, PREG_SPLIT_NO_EMPTY);
  301. // Fetch source data from PubMed.gov for all given PubMed IDs:
  302. list($errors, $sourceText) = fetchDataFromPubMed($idArray, $sourceFormat); // function 'fetchDataFromPubMed()' is defined in 'import.inc.php'
  303. }
  304. // - arXiv IDs:
  305. elseif (preg_match("/^arXiv XML$/i", $sourceFormat) AND preg_match("#(arXiv:|http://arxiv\.org/abs/)?([\w.-]+/\d{7}|\d{4}\.\d{4,})(v\d+)?#i", $sourceIDs))
  306. {
  307. // Remove any "arXiv:" or "http://arxiv.org/abs/" prefixes from the ID string:
  308. $sourceIDs = preg_replace("#(?<=^|\s)(arXiv:|http://arxiv\.org/abs/)#", "", $sourceIDs);
  309. // Split on any whitespace between arXiv IDs:
  310. $idArray = preg_split("/\s+/", $sourceIDs, -1, PREG_SPLIT_NO_EMPTY);
  311. // Fetch source data from arXiv.org for all given arXiv IDs:
  312. list($errors, $sourceText) = fetchDataFromArXiv($idArray, $sourceFormat); // function 'fetchDataFromArXiv()' is defined in 'import.inc.php'
  313. // NOTE: In case of function 'fetchDataFromArXiv()', variable '$sourceText' contains the SimplePie object with the parsed Atom XML feed
  314. // TODO: This is inconsistent with the behaviour of the other 'fetchData*()' functions and we should do something about it!
  315. // NOTE: Since, for arXiv IDs, '$sourceText' contains the SimplePie object (and not just text), handling of any encoding issues is done
  316. // within function 'arxivToRefbase()'
  317. }
  318. // - DOIs/OpenURLs:
  319. // TODO: - to support OpenURL context objects from COinS or Atom XML, we need to decode ampersand characters ('&amp;' -> '&'), and allow for OpenURLs that don't start with '?' or '&'
  320. elseif (preg_match("/^CrossRef XML$/i", $sourceFormat) AND (preg_match("#(?<=^|\s)(doi:|http://dx\.doi\.org/)?10\.\d{4}/\S+?(?=$|\s)#i", $sourceIDs) OR preg_match("#(?<=^|\s)(openurl:|http://.+?(?=\?))?.*?(?<=[?&])ctx_ver=Z39\.88-2004(?=&|$).*?(?=$|\s)#i", $sourceIDs)))
  321. {
  322. // Remove any prefixes (like "doi:", "openurl:", "http://dx.doi.org/" or "http://...?") from the ID string:
  323. $sourceIDs = preg_replace("#(?<=^|\s)(doi:|http://dx\.doi\.org/)#", "", $sourceIDs);
  324. $sourceIDs = preg_replace("#(?<=^|\s)(openurl:|http://.+?(?=\?))#", "", $sourceIDs);
  325. // Split on any whitespace between DOIs/OpenURLs:
  326. $idArray = preg_split("/\s+/", $sourceIDs, -1, PREG_SPLIT_NO_EMPTY);
  327. // Try to retrieve information from PubMed.gov before querying CrossRef.org:
  328. // TODO: Test with $sourceIDs containing a mixture of DOIs and OpenURLs, as well as with $sourceIDs containing DOIs for articles listed in PubMed AND NOT listed in PubMed!
  329. if (preg_match("#10\.\d{4}/\S+?(?=$|\s)#i", $sourceIDs))
  330. {
  331. list($errors, $sourceText, $idArray) = fetchDOIsFromPubMed($idArray); // function 'fetchDOIsFromPubMed()' is defined in 'import.inc.php'
  332. }
  333. if (!empty($idArray))
  334. {
  335. // Fetch record metadata from CrossRef.org for all given DOIs/OpenURLs:
  336. list($errors, $sourceText) = fetchDataFromCrossRef($idArray, $sourceFormat); // function 'fetchDataFromCrossRef()' is defined in 'import.inc.php'
  337. // In case of a latin1-based database, attempt to convert UTF-8 data to refbase markup & latin1:
  338. if (($contentTypeCharset == "ISO-8859-1") AND (detectCharacterEncoding($sourceText) == "UTF-8"))
  339. $sourceText = convertToCharacterEncoding("ISO-8859-1", "TRANSLIT", $sourceText, "UTF-8");
  340. }
  341. else
  342. {
  343. $sourceFormat = "Pubmed Medline";
  344. }
  345. }
  346. }
  347. // --------------------------------------------------------------------
  348. // PARSE SOURCE TEXT:
  349. if (!empty($sourceText) AND !empty($sourceFormat))
  350. {
  351. // fetch the path/name of the import format file that's associated with the import format given in '$sourceFormat':
  352. $importFormatFile = getFormatFile($sourceFormat, "import"); // function 'getFormatFile()' is defined in 'include.inc.php()'
  353. if (!empty($importFormatFile))
  354. {
  355. // Get all cite keys specified by the current user and build an array of uniquified cite keys ('$citeKeysArray')
  356. // which is used to ensure uniqueness of generated cite keys among all imported records as well as the user's existing records:
  357. $userCiteKeysArray = getUserCiteKeys($loginUserID); // '$loginUserID' is provided as session variable on login; function 'getUserCiteKeys()' is defined in 'include.inc.php'
  358. // Get all user options for the current user (which is required by function 'generateCiteKey()'
  359. // that, in turn, is called below & from within the 'addRecords()' function):
  360. $userOptionsArray = getUserOptions($loginUserID); // function 'getUserOptions()' is defined in 'include.inc.php'
  361. // Include the found import format file *once*:
  362. include_once "import/" . $importFormatFile;
  363. // Parse records from the specified import format:
  364. // function 'importRecords()' is defined in the import format file given in '$importFormatFile' (which, in turn, must reside in the 'import' directory of the refbase root directory)
  365. // NOTE: see note above below the 'fetchDataFromArXiv()' function
  366. list($importDataArray, $recordsCount, $importRecordNumbersRecognizedFormatArray, $importRecordNumbersNotRecognizedFormatArray, $errors) = importRecords($sourceText, $importRecordsRadio, $importRecordNumbersArray);
  367. }
  368. else
  369. $errors["sourceText"] = "Sorry, but the $sourceFormat importer is currently not available!";
  370. }
  371. else
  372. {
  373. $importDataArray = array();
  374. $recordsCount = 0;
  375. $importRecordNumbersRecognizedFormatArray = array();
  376. $importRecordNumbersNotRecognizedFormatArray = array();
  377. }
  378. // --------------------------------------------------------------------
  379. // VALIDATE DATA FIELDS:
  380. // For each parsed record, function 'validateRecords()' (in 'import.inc.php') will assign errors to '$errors["sourceText"]'.
  381. // In case of the "Import IDs" form, we'll redirect these error messages to '$errors["sourceIDs"]':
  382. if (($formType == "importID") AND isset($errors["sourceText"])) // some errors occurred
  383. {
  384. $errors["sourceIDs"] = $errors["sourceText"];
  385. unset($errors["sourceText"]);
  386. }
  387. // Verify that some source text was given:
  388. if (($formType == "import") AND empty($sourceText)) // no source data given
  389. $errors["sourceText"] = "Source data missing!";
  390. elseif (($formType == "importID") AND !isset($errors["sourceIDs"]) AND (empty($sourceIDs) OR empty($sourceFormat))) // no recognized IDs given
  391. $errors["sourceIDs"] = "You must specify at least one valid ID!";
  392. // If some source data were given but the source text format wasn't among the recognized formats:
  393. elseif (empty($sourceFormat))
  394. $errors["sourceText"] = "Unrecognized data format!";
  395. // Validate the 'importRecords' text entry field...
  396. elseif ($importRecordsRadio == "only") // ...if we're supposed to import only particular records
  397. {
  398. // ...make sure that some records were specified and that they are actually available in the input data:
  399. if (empty($importRecords) OR !preg_match("/[0-9]/", $importRecords)) // partial import requested but no record numbers given
  400. {
  401. $errors["importRecords"] = "Record number(s) missing!";
  402. }
  403. else // if some record numbers were given, check that these numbers are actually available in the input data:
  404. {
  405. $availableRecordNumbersArray = range(1, $recordsCount); // construct an array of available record numbers
  406. // get all record numbers to import which are NOT available in the source data:
  407. $importRecordNumbersNotAvailableArray = array_diff($importRecordNumbersArray, $availableRecordNumbersArray); // get all unique array elements from '$importRecordNumbersArray' that are not present in '$availableRecordNumbersArray'
  408. // just FYI, the line below would get all record numbers to import which ARE actually available in the source data:
  409. // $importRecordNumbersAvailableArray = array_diff($importRecordNumbersArray, $importRecordNumbersNotAvailableArray); // get all unique array elements from '$importRecordNumbersArray' that are not present in '$importRecordNumbersNotAvailableArray'
  410. if (!empty($importRecordNumbersNotAvailableArray)) // the user did request to import some record(s) that don't exist in the pasted source data
  411. {
  412. if ($recordsCount == 1) // one record available
  413. $errors["importRecords"] = "Only one record available! You can only use record number '1'.";
  414. else // several records available
  415. $errors["importRecords"] = "Only " . $recordsCount . " records available! You can only use record numbers '1-" . $recordsCount . "'.";
  416. }
  417. }
  418. }
  419. // the user did enter some source text and did input some recognized record numbers
  420. if (!empty($sourceText))
  421. {
  422. // NOTE: validation of individual records is done within the import functions and the '$errors' array is modified within these functions if any records of unrecognized format are found
  423. if (empty($importRecordNumbersRecognizedFormatArray)) // if none of the records to import had a recognized format
  424. {
  425. // we'll file an additional error element here, which will indicate whether the 'Skip records with unrecognized data format' checkbox shall be displayed or not
  426. $errors["badRecords"] = "all";
  427. if (!empty($sourceFormat) AND (count($importRecordNumbersNotRecognizedFormatArray) > 1)) // if the user attempted to import more than one record
  428. $errors["skipBadRecords"] = "Sorry, but all of the specified records were of unrecognized data format!";
  429. else // user tried to import one single record (will be also triggered if '$importRecords' is empty)
  430. $errors["skipBadRecords"] = ""; // we insert an empty 'skipBadRecords' element so that 'import.php' does the right thing
  431. }
  432. elseif (!empty($importRecordNumbersNotRecognizedFormatArray)) // some records had a recognized format but some were NOT recognized
  433. {
  434. $errors["badRecords"] = "some"; // see note above
  435. $errors["skipBadRecords"] = "Skip records with unrecognized data format";
  436. }
  437. }
  438. else
  439. {
  440. $errors["badRecords"] = "all";
  441. }
  442. // --------------------------------------------------------------------
  443. // Check if there were any validation errors:
  444. if (count($errors) > 0)
  445. {
  446. // we ignore errors regarding records with unrecognized format if:
  447. // - at least some of the specified records had a valid data format and
  448. // - the user did mark the 'Skip records with unrecognized data format' checkbox
  449. if (!(($errors["badRecords"] == "some") AND ($skipBadRecords == "1")))
  450. {
  451. // ...otherwise we'll present the error message(s):
  452. if (preg_match("/^be/i", $client)) // if the query originated from a Bookends upload request ("be-bookends_import-1.0")
  453. {
  454. // Include errors in redirection request:
  455. $redirectURL = $referer . "?";
  456. foreach ($errors as $varname => $value)
  457. $redirectURL .= "&" . $varname . "=" . rawurlencode($value);
  458. header("Location: " . $redirectURL);
  459. }
  460. elseif (preg_match("/^cli/i", $client)) // if the query originated from a command line client such as the refbase CLI clients ("cli-refbase-1.1", "cli-refbase_import-1.0")
  461. {
  462. echo "There were validation errors regarding the data you submitted:\n\n";
  463. if (($errors["badRecords"] == "all") && (!empty($errors["skipBadRecords"])))
  464. $skipBadInfo = $errors["skipBadRecords"] . "\n\n";
  465. elseif ($errors["badRecords"] == "some")
  466. $skipBadInfo = "Use '--skipbad=1' to skip records with unrecognized data format.\n\n";
  467. else
  468. $skipBadInfo = "";
  469. unset($errors["badRecords"]);
  470. unset($errors["skipBadRecords"]);
  471. foreach ($errors as $varname => $value)
  472. {
  473. $value = preg_replace("/<br>/i", "\n ", $value);
  474. echo $varname . ": " . $value . "\n\n";
  475. }
  476. echo $skipBadInfo;
  477. }
  478. else
  479. {
  480. // Write back session variables:
  481. saveSessionVariable("errors", $errors); // function 'saveSessionVariable()' is defined in 'include.inc.php'
  482. saveSessionVariable("formVars", $formVars);
  483. // Redirect the browser back to the import form:
  484. header("Location: " . $referer);
  485. }
  486. exit; // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> !EXIT! <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
  487. }
  488. }
  489. // --------------------------------------------------------------------
  490. // If we made it here, then the data is considered valid!
  491. // IMPORT RECORDS:
  492. $importedRecordsArray = array();
  493. if ((count($importRecordNumbersRecognizedFormatArray) == 1) AND !preg_match("/^(cli|be)/i", $client)) // if this is the only record we'll need to import -AND- if the import didn't originate from a refbase command line client:
  494. {
  495. // If no specific cite key exists in the record data, any existing 'call_number' string gets also copied to the
  496. // user-specific 'cite_key' field (which will ensure that this original call number/cite key is retained as
  497. // cite key upon export); however, note that (depending on the user's settings) the cite key may get modified
  498. // or regenerated by function 'generateCiteKey()' below
  499. if (!empty($importDataArray['records'][0]['call_number']) AND empty($importDataArray['records'][0]['cite_key']))
  500. $importDataArray['records'][0]['cite_key'] = $importDataArray['records'][0]['call_number'];
  501. // This is a stupid hack that maps the names of the '$importDataArray['records'][0]' array keys to those
  502. // used by the '$parsedRecordFormVars' (='$formVars') array (which is required by function 'generateCiteKey()')
  503. // (eventually, the '$formVars' array should use the MySQL field names as names for its array keys)
  504. $parsedRecordFormVars = buildFormVarsArray($importDataArray['records'][0]); // function 'buildFormVarsArray()' is defined in 'include.inc.php'
  505. // Generate or modify (e.g. uniquify) the cite key for this record:
  506. $importDataArray['records'][0]['cite_key'] = generateCiteKey($parsedRecordFormVars); // function 'generateCiteKey()' is defined in 'include.inc.php'
  507. // save import data to session variable:
  508. // NOTE: Saving import data to a session variable allows to retain large param/value strings (that would exceed
  509. // the maximum string limit for GET requests). This works around a limitation in Internet Explorer which
  510. // has a maximum URL length of 2,083 characters & a maximum path length of 2,048 characters.
  511. // More info: <http://support.microsoft.com/kb/208427/EN-US/>
  512. saveSessionVariable("importData", $importDataArray['records'][0]);
  513. // RELOCATE TO IMPORT PAGE:
  514. // call 'record.php' and load the form fields with the data of the current record
  515. header("Location: record.php?recordAction=add&mode=import&importSource=generic");
  516. exit; // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> !EXIT! <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
  517. }
  518. else // import record(s) directly:
  519. {
  520. // Add all records to the database (i.e., for each record, add a row entry to MySQL table 'refs'):
  521. // ('$importedRecordsArray' will hold the serial numbers of all newly imported records)
  522. $importedRecordsArray = addRecords($importDataArray); // function 'addRecords()' is defined in 'include.inc.php'
  523. }
  524. // --------------------------------------------------------------------
  525. // DISPLAY RESULTS
  526. if (!empty($importedRecordsArray)) // if some records were successfully imported
  527. {
  528. $importedRecordsCount = count($importedRecordsArray);
  529. // build string of record serial numbers (to be used with the 'records' query parameter):
  530. $recordSerialsQueryString = $importedRecordsArray[0]; // add first serial number
  531. for ($i=1; $i < $importedRecordsCount; $i++) // for the second to the last serial number...
  532. {
  533. // implode consecutive serial numbers into a range (e.g. transform "150,151,152" into "150-152"):
  534. if ($importedRecordsArray[$i] == ($importedRecordsArray[$i - 1] + 1)) // if this number is consecutive to the previous one
  535. {
  536. if (!preg_match("/-$/", $recordSerialsQueryString))
  537. $recordSerialsQueryString .= "-"; // start range
  538. if ($i == ($importedRecordsCount - 1)) // if this is the last item in the array
  539. $recordSerialsQueryString .= $importedRecordsArray[$i]; // end range
  540. }
  541. else // this number is NOT consecutive to the previous one
  542. {
  543. if (preg_match("/-$/", $recordSerialsQueryString))
  544. $recordSerialsQueryString .= $importedRecordsArray[$i - 1]; // end any previous range
  545. $recordSerialsQueryString .= "," . $importedRecordsArray[$i]; // append this number using a comma as a delimiter
  546. }
  547. }
  548. // Send EMAIL announcement:
  549. if ($sendEmailAnnouncements == "yes")
  550. {
  551. // variables '$sendEmailAnnouncements', '$mailingListEmail', '$officialDatabaseName' and '$databaseBaseURL' are specified in 'ini.inc.php';
  552. // '$loginFirstName' and '$loginLastName' are provided as session variables by the 'start_session()' function in 'include.inc.php'
  553. // send a notification email to the mailing list email address given in '$mailingListEmail':
  554. $emailRecipient = "Literature Database Announcement List <" . $mailingListEmail . ">";
  555. if ($importedRecordsCount == 1)
  556. {
  557. $emailSubject = "New record added to the " . $officialDatabaseName;
  558. $emailBodyIntro = "One record has been added to the " . $officialDatabaseName . ":";
  559. $detailsURL = $databaseBaseURL . "show.php?record=" . $importedRecordsArray[0];
  560. }
  561. else // $importedRecordsCount > 1
  562. {
  563. $emailSubject = "New records added to the " . $officialDatabaseName;
  564. $emailBodyIntro = $importedRecordsCount . " records have been added to the " . $officialDatabaseName . ":";
  565. $detailsURL = $databaseBaseURL . "show.php?records=" . $recordSerialsQueryString;
  566. }
  567. $emailBody = $emailBodyIntro
  568. . "\n\n added by: " . $loginFirstName . " " . $loginLastName
  569. . "\n details: " . $detailsURL
  570. . "\n";
  571. sendEmail($emailRecipient, $emailSubject, $emailBody); // function 'sendEmail()' is defined in 'include.inc.php'
  572. }
  573. if ($importedRecordsCount == 1)
  574. $headerMessage = $importedRecordsCount . " " . $loc["RecordSuccessfullyImported"] . ":";
  575. else // $importedRecordsCount > 1
  576. $headerMessage = $importedRecordsCount . " " . $loc["RecordsSuccessfullyImported"] . ":";
  577. // DISPLAY all newly added records:
  578. header("Location: show.php?records=" . $recordSerialsQueryString . "&headerMsg=" . rawurlencode($headerMessage) . "&client=" . $client);
  579. }
  580. else // nothing imported
  581. {
  582. if (preg_match("/^cli/i", $client)) // if the query originated from a command line client such as the refbase CLI clients ("cli-refbase-1.1", "cli-refbase_import-1.0")
  583. {
  584. echo "No records imported!\n\n";
  585. }
  586. else
  587. {
  588. // we'll file again this additional error element here so that the 'errors' session variable isn't empty causing 'import.php' to re-load the form data that were submitted by the user
  589. $errors["badRecords"] = "all";
  590. // return an appropriate error message:
  591. $HeaderString = returnMsg($loc["NoRecordsImported"] . "!", "warning", "strong", "HeaderString"); // function 'returnMsg()' is defined in 'include.inc.php'
  592. // Write back session variables:
  593. saveSessionVariable("errors", $errors);
  594. saveSessionVariable("formVars", $formVars);
  595. header("Location: " . $referer); // redirect to the calling page (normally, 'import.php')
  596. }
  597. }
  598. // --------------------------------------------------------------------
  599. ?>