You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1183 lines
51 KiB

  1. #!/usr/bin/perl
  2. # Project: Web Reference Database (refbase) <http://www.refbase.net>
  3. # Copyright: Matthias Steffens <mailto:refbase@extracts.de> and the file's
  4. # original author(s).
  5. #
  6. # This code is distributed in the hope that it will be useful,
  7. # but WITHOUT ANY WARRANTY. Please see the GNU General Public
  8. # License for more details.
  9. #
  10. # File: ./contrib/command_line/refbase
  11. # Repository: $HeadURL: file:///svn/p/refbase/code/branches/bleeding-edge/contrib/command_line/refbase $
  12. # Author(s): Matthias Steffens <mailto:refbase@extracts.de>
  13. #
  14. # Created: 06-Jun-06, 18:00
  15. # Modified: $Date: 2008-11-13 21:08:22 +0000 (Thu, 13 Nov 2008) $
  16. # $Author: msteffens $
  17. # $Revision: 1315 $
  18. # REFBASE -- a refbase command line interface
  19. # Purpose: Perl script that allows to search a refbase online database from the command line and retrieve results in various formats
  20. # Usage: refbase [OPTIONS]
  21. # Help: For help with the syntax type 'refbase -h'
  22. # To view some usage examples type 'refbase -X'
  23. # Further information is available at <http://cli.refbase.net/>
  24. # Version: 1.2.1
  25. # Requires: - a shell with Perl execution capabilities
  26. # - the Perl CPAN modules LWP::UserAgent, HTTP::Request::Common, HTTP::Response, HTTP::Cookies and URI::URL
  27. # - access to a refbase database (refbase-0.9.0 or greater, refbase-0.9.5 required for '-A|--append' and '-B|--update' mode)
  28. # Limits: - Currently, this utility supports search & retrieve, but does not support update actions such as add, edit or delete.
  29. # - This script is currently just an interface to 'show.php', which for example does not support arbitrary sort orders.
  30. # - Specifying the record offset (using '-S|--start') as well as the number of records to be returned (using '-R|--rows') will
  31. # only work for the formats 'html', 'rtf', 'pdf', 'latex', 'latex_bbl', 'markdown', 'ascii', 'atom', 'srw_dc' and 'srw_mods',
  32. # since the other formats are designed to always export the entire result set. Note that for 'html', '--start' is adjusted to
  33. # the next lower value that is an exact multiple of '--rows' (which ensures correct behaviour of the browse links).
  34. # - The authentication mechanism is currently limited in that a given password will be transferred as parameter in the POST request
  35. # - The '-A|--append' and '-B|--update' modes currently only work with formats 'bibtex', 'mods' and 'srw_mods'
  36. # --------------------------------------------------------------------------------------------------------------
  37. $version = "1.2.1";
  38. # Configure variables:
  39. # Specify the full URLs to any refbase servers that shall be queried:
  40. # Notes: - the given hash keys will work as shortcuts, e.g. '--host=local' would query
  41. # your local refbase installation; one hash key must be named 'default',
  42. # all other keys can be freely chosen
  43. # - by default, the server labeled with key 'default' will be queried
  44. %hosts = (
  45. 'default' => 'http://beta.refbase.net/',
  46. 'local' => 'http://localhost/refs/',
  47. 'beta' => 'http://beta.refbase.net/',
  48. 'beta2' => 'http://refbase.textdriven.com/beta/',
  49. 'demo' => 'http://demo.refbase.net/',
  50. 'org' => 'http://www.refbase.org/'
  51. );
  52. # Specify the default values for all options that are not explicitly specified:
  53. %params = (
  54. # query options:
  55. 'author' => '', # -a|--author
  56. 'abstract' => '', # -b|--abstract
  57. 'cite_key' => '', # -c|--citekey => requires '--userid'
  58. 'date' => '', # -d|--date
  59. 'area' => '', # -e|--area
  60. 'thesis' => '', # -f|--thesis
  61. 'contribution_id' => '', # -i|--contribid
  62. 'abbrev_journal' => '', # -j|--journal
  63. 'keywords' => '', # -k|--keywords
  64. 'location' => '', # -l|--location
  65. 'ismarked' => '', # -m|--marked => requires '--userid'
  66. 'notes' => '', # -n|--notes
  67. 'publication' => '', # -p|--publication
  68. 'queryType' => 'and', # -q|--query => multiple options will by default be connected with 'AND'
  69. 'records' => '', # -r|--records
  70. 'selected' => '', # -s|--selected => requires '--userid'
  71. 'title' => '', # -t|--title
  72. 'userID' => '', # -u|--userid => the user ID of your account at the refbase database you're querying
  73. 'where' => '', # -w|--where
  74. 'type' => '', # -x|--type
  75. 'year' => '', # -y|--year
  76. 'serial' => '.+', # -z|--serial => the default '.+' causes all database records to be returned if only empty params are given (normally, you should leave this default as is)
  77. # output options:
  78. 'appendFile' => '', # -A|--append => file to which search results will be appended
  79. 'updateRecords' => '0', # -B|--update => update existing records in '-A, --append' file; must be '0' (=no) or '1' (=yes)
  80. 'citeStyle' => '', # -C|--style => desired citation style, given name must match an entry within the database's MySQL table 'styles' (keep empty to use the database default)
  81. 'extractFile' => '', # -E|--extract => file from which citations will be extracted
  82. 'format' => 'ascii', # -F|--format => output format must be 'html', 'rtf', 'pdf', 'latex', 'latex_bbl', 'markdown', 'ascii', 'ads', 'bibtex', 'endnote', 'isi', 'ris', 'atom', 'mods', 'oai_dc', 'odf', 'srw_dc', 'srw_mods', 'word' or '' (the empty string '' will produce the default 'ascii' output style)
  83. 'showLinks' => '1', # -L|--showlinks => hide/display links column in HTML output; must be '0', '1', or '' (the empty string '' will produce the default output style, i.e. print any links)
  84. 'citeOrder' => 'author', # -O|--order => cite order must be 'author', 'year', 'type', 'type-year', 'creation-date' or '' (the empty string '' will produce the default 'author' sort order)
  85. 'showQuery' => '0', # -Q|--showquery => hide/display SQL query in ASCII output; must be '0', '1', or '' (the empty string '' will produce the default output style, i.e. not showing the SQL query)
  86. 'showRows' => '', # -R|--rows => desired number of search results (keep empty to use the database default)
  87. 'startRecord' => '1', # -S|--start => offset of the first search result, starting with one
  88. 'viewType' => 'web', # -V|--view => view type of HTML output; must be 'Web', 'Print', 'Mobile' or '' (the empty string '' will produce the default 'Web' output style)
  89. # fixed parameters:
  90. 'submit' => 'Cite', # display type for HTML output; must be 'Display', 'Cite', 'Export', or '' (the empty string '' will produce the default 'columnar' output style); this param's value will get adopted below based on the chosen '--format'
  91. 'client' => "cli-refbase-" . $version # the client ID of this command line utility
  92. );
  93. # Specify the default login credentials for a refbase user account:
  94. %loginParams = (
  95. 'loginEmail' => '', # -U|--user -> the login email address of an existing refbase user
  96. 'loginPassword' => '' # -P|--password -> the password for the given user account
  97. );
  98. # Specify the location of the cookie jar file:
  99. # This file will be used to store & retrieve cookies
  100. $cookieJarFile = "$ENV{HOME}/.lwpcookies.txt";
  101. # For '-A|--append' mode, specify whether all records in the given file
  102. # shall be sorted by cite key after any records have been appended/updated:
  103. $sortAppendFileData = 1; # must be '1' (=yes) or '0' (=no)
  104. # For '-A|--append' mode, specify whether all appended/updated
  105. # records shall be reported as citations to STDOUT:
  106. $reportResults = 1; # must be '1' (=yes) or '0' (=no)
  107. # For '-A|--append' mode, specify whether a backup file shall be created
  108. # before anything gets updated or appended to the given file:
  109. $backupAppendFile = 1; # must be '1' (=yes) or '0' (=no)
  110. # By default, the backup file uses the given file name with a tilde character
  111. # added at the end of the file name. Adopt to your needs if necessary:
  112. $backupFileNameSuffix = "~";
  113. # For '-E|--extract' mode, specify regular expression patterns that match the
  114. # citation IDs (i.e. refbase serial numbers or cite keys from the 'cite_key'
  115. # field) in the given file:
  116. # For each recognized file name extension[*], there must be a triplet of code
  117. # (as shown below), each hash element contains an array with three elements:
  118. # 1) a regex pattern that matches the cite IDs in the file (e.g. '\\\\cite\{(.+?)\}')
  119. # (note that the regex patterns will be applied using the 'msg' mode modifiers)
  120. # 2) the number of the sub-pattern that captures the cite IDs (e.g. '1')
  121. # 3) a regex split pattern that matches the delimiter(s) used between multiple cite IDs (e.g. '[, ]+')
  122. # [*]: The hash key must match the file's file name extension. If the given file is
  123. # of unknown file type the 'default' pattern will be applied.
  124. %citeIDPatterns = (
  125. # - LaTeX, .tex file:
  126. 'tex' => [ '\\\\(?:(?:no)?cite|cite(?:n|num|online)|cite(?:al)?[tp]\*?|cite(?:author\*?|year(?:par)?|text|[tp]alias))(?:\[.*?\])*\{(.+?)\}', # 1), matches e.g.: '\cite{...}', '\nocite{...}', '\cite[...]{...}' and cite commands from the 'cite' & 'natbib' packages
  127. '1', # 2)
  128. '[, ]+' ], # 3)
  129. # - LaTeX, .aux file:
  130. 'aux' => [ '\\\\(?:citation|bibcite)\{(.+?)\}', # matches '\citation{...}' or '\bibcite{...}'
  131. '1',
  132. '[, ]+' ],
  133. # - LaTeX, .bib file:
  134. 'bib' => [ '^[\t ]*@[A-Za-z]+\{(.+?),', # matches '@Article{...,' etc
  135. '1',
  136. '[, ]+' ],
  137. # - LaTeX, .bbl file:
  138. 'bbl' => [ '\\\\bibitem(?s:\[.*?\])*\{(.+?)\}', # matches '\bibitem{...}' or '\bibitem[...]{...}'
  139. '1',
  140. '[, ]+' ],
  141. # - MODS, SRW_MODS, or Endnote XML file:
  142. # TODO: add support for OAI_DC, SRW_DC and refbase OpenSearch Atom XML (<dc.identifier>citekey:...</dc.identifier>)
  143. 'xml' => [ '(?:<mods.+?ID="|<identifier.+?type="citekey">|<(?:label|accession-num)>\s*<style.*?>\s*)(.+?)(?:</identifier>|\s*</style>\s*</(?:label|accession-num)>|")', # matches MODS IDs like '<mods ID="..."', '<mods version="..." ID="..."' or '<identifier type="citekey">...</identifier>' etc -OR- Endnote XML IDs, e.g. '<label><style face="normal" font="default" size="100%">...</style></label>' etc
  144. '1',
  145. '[, ]+' ],
  146. # - Endnote tagged text, .enw file:
  147. 'enw' => [ '^%F ([^\n\r]+)$', # matches '%F ...'
  148. '1',
  149. '[, ]+' ],
  150. # - RIS, .ris file:
  151. 'ris' => [ '^ID - ([^\n\r]+)$', # matches 'ID - ...'
  152. '1',
  153. '[, ]+' ],
  154. # - Generic, extracts IDs from lists of comma-separated refbase serial numbers (or cite keys) that are enclosed by braces:
  155. 'default' => [ '\{([^\n\r]+?)\}', # matches e.g.: '{123}', '{1,12,33}', '{Arrigo+Thomas2004}' or '{Arrigo+Thomas2004,Assur1958}'
  156. '1',
  157. '[,]+' ],
  158. );
  159. # --------------------------------------------------------------------------------
  160. # NOTE: You shouldn't need to change anything below this line
  161. # CPAN modules:
  162. use LWP::UserAgent; # more info: <http://search.cpan.org/~gaas/libwww-perl-5.805/lib/LWP/UserAgent.pm>
  163. use HTTP::Request::Common; # more info: <http://search.cpan.org/~gaas/libwww-perl-5.805/lib/HTTP/Request/Common.pm>
  164. use HTTP::Response; # more info: <http://search.cpan.org/~gaas/libwww-perl-5.805/lib/HTTP/Response.pm>
  165. use HTTP::Cookies; # more info: <http://search.cpan.org/~gaas/libwww-perl-5.805/lib/HTTP/Cookies.pm>
  166. use URI::URL; # more info: <http://search.cpan.org/~gaas/URI-1.35/URI/URL.pm>
  167. # standard modules:
  168. use Time::Local;
  169. # initialize variables:
  170. $host = $hosts{'default'};
  171. $optCt = 0;
  172. $format = '';
  173. @appendFileSerials = ();
  174. $updateRecords = 0;
  175. @extractFileSerials = ();
  176. @extractFileKeys = ();
  177. %months = (
  178. 'Jan' => 0,
  179. 'Feb' => 1,
  180. 'Mar' => 2,
  181. 'Apr' => 3,
  182. 'May' => 4,
  183. 'Jun' => 5,
  184. 'Jul' => 6,
  185. 'Aug' => 7,
  186. 'Sep' => 8,
  187. 'Oct' => 9,
  188. 'Nov' => 10,
  189. 'Dec' => 11
  190. );
  191. # Extract options:
  192. # TODO: use Getopt::Long
  193. # general options:
  194. if (($ARGV[0] eq '--help') or ($ARGV[0] eq '-h') or ($ARGV[0] eq '')) { &usage (0); } # if the user asked for --help/-h or didn't provide any input, call the 'usage' subroutine
  195. elsif (($ARGV[0] eq '--version') or ($ARGV[0] eq '-v')) { &version (0); } # show version information
  196. elsif (($ARGV[0] eq '--examples') or ($ARGV[0] eq '-X')) { &examples (0); } # print some usage examples
  197. else {
  198. foreach (@ARGV) {
  199. # extract query options:
  200. if ($_ =~ /^(?:-a|--author)=(.+)$/) { $params{'author'} = $1; $optCt++; }
  201. elsif ($_ =~ /^(?:-b|--abstract)=(.+)$/) { $params{'abstract'} = $1; $optCt++; }
  202. elsif ($_ =~ /^(?:-c|--citekey)=(.+)$/) { $params{'cite_key'} = $1; $optCt++; }
  203. elsif ($_ =~ /^(?:-d|--date)=(.+)$/) { $params{'date'} = $1; $optCt++; }
  204. elsif ($_ =~ /^(?:-e|--area)=(.+)$/) { $params{'area'} = $1; $optCt++; }
  205. elsif ($_ =~ /^(?:-f|--thesis)=(.+)$/) { $params{'thesis'} = $1; $optCt++; }
  206. elsif ($_ =~ /^(?:-i|--contribid)=(.+)$/) { $params{'contribution_id'} = $1; $optCt++; }
  207. elsif ($_ =~ /^(?:-j|--journal)=(.+)$/) { $params{'abbrev_journal'} = $1; $optCt++; }
  208. elsif ($_ =~ /^(?:-k|--keywords)=(.+)$/) { $params{'keywords'} = $1; $optCt++; }
  209. elsif ($_ =~ /^(?:-l|--location)=(.+)$/) { $params{'location'} = $1; $optCt++; }
  210. elsif ($_ =~ /^(?:-m|--marked)=(.+)$/) { $params{'ismarked'} = $1; $optCt++; }
  211. elsif ($_ =~ /^(?:-n|--notes)=(.+)$/) { $params{'notes'} = $1; $optCt++; }
  212. elsif ($_ =~ /^(?:-p|--publication)=(.+)$/) { $params{'publication'} = $1; $optCt++; }
  213. elsif ($_ =~ /^(?:-q|--query)=(.+)$/) { $params{'queryType'} = $1; }
  214. elsif ($_ =~ /^(?:-r|--records)=(.+)$/) { $params{'records'} = $1; $optCt++; }
  215. elsif ($_ =~ /^(?:-s|--selected)=(.+)$/) { $params{'selected'} = $1; $optCt++; }
  216. elsif ($_ =~ /^(?:-t|--title)=(.+)$/) { $params{'title'} = $1; $optCt++; }
  217. elsif ($_ =~ /^(?:-u|--userid)=(.+)$/) { $params{'userID'} = $1; $optCt++; }
  218. elsif ($_ =~ /^(?:-w|--where)=(.+)$/) { $params{'where'} = $1; $optCt++; }
  219. elsif ($_ =~ /^(?:-x|--type)=(.+)$/) { $params{'type'} = $1; $optCt++; }
  220. elsif ($_ =~ /^(?:-y|--year)=(.+)$/) { $params{'year'} = $1; $optCt++; }
  221. elsif ($_ =~ /^(?:-z|--serial)=(.+)$/) { $params{'serial'} = $1; }
  222. # extract output options:
  223. elsif ($_ =~ /^(?:-A|--append)=(.+)$/) { $params{'appendFile'} = $1; }
  224. elsif ($_ =~ /^(?:-B|--update)=(.+)$/) { $params{'updateRecords'} = $1; }
  225. elsif ($_ =~ /^(?:-C|--style)=(.+)$/) { $params{'citeStyle'} = $1; }
  226. elsif ($_ =~ /^(?:-E|--extract)=(.+)$/) { $params{'extractFile'} = $1; }
  227. elsif ($_ =~ /^(?:-F|--format)=(.+)$/) { $params{'format'} = $1; }
  228. elsif ($_ =~ /^(?:-L|--showlinks)=(.+)$/) { $params{'showLinks'} = $1; }
  229. elsif ($_ =~ /^(?:-O|--order)=(.+)$/) { $params{'citeOrder'} = $1; }
  230. elsif ($_ =~ /^(?:-Q|--showquery)=(.+)$/) { $params{'showQuery'} = $1; }
  231. elsif ($_ =~ /^(?:-R|--rows)=(.+)$/) { $params{'showRows'} = $1; }
  232. elsif ($_ =~ /^(?:-S|--start)=(.+)$/) { $params{'startRecord'} = $1; }
  233. elsif ($_ =~ /^(?:-V|--view)=(.+)$/) { $params{'viewType'} = $1; }
  234. # extract server options:
  235. elsif ($_ =~ /^(?:-H|--host)=(.+)$/) { $host = $1; }
  236. elsif ($_ =~ /^(?:-P|--password)=(.+)$/) { $loginParams{'loginPassword'} = $1; }
  237. elsif ($_ =~ /^(?:-U|--user)=(.+)$/) { $loginParams{'loginEmail'} = $1; }
  238. }
  239. }
  240. # resolve any host shortcuts:
  241. if (exists($hosts{$host})) {
  242. $host = $hosts{$host};
  243. }
  244. elsif ($host !~ /^https?:\/\//i) {
  245. $host = $hosts{'default'}; # can't resolve given host, reset back to default
  246. }
  247. # if any query option other than the 'serial' parameter was explicitly set,
  248. # remove any default '.+' value from the 'serial' parameter:
  249. # (otherwise an 'OR' query would always match everything)
  250. if (($optCt > 0) && ($params{'serial'} eq '.+')) {
  251. # if '--citekey', '--selected' or '--marked' is given, '--userid' must be specified as well; i.e.,
  252. # in case of these user-specific params, we'll only empty the 'serial' param if a user ID is present
  253. if (($params{'cite_key'} eq '') && ($params{'selected'} eq '') && ($params{'ismarked'} eq '')) {
  254. $params{'serial'} = '';
  255. }
  256. elsif ($params{'userID'} ne '') { # at least one of '--citekey', '--selected' or '--marked' was given together with a '--userid'
  257. $params{'serial'} = '';
  258. }
  259. }
  260. # assign correct URL params based on the '-F|--format' option:
  261. if (exists($params{'format'})) {
  262. $format = $params{'format'};
  263. if ($format =~ /^(rtf|pdf|latex|latex_bbl|markdown|ascii)$/i) {
  264. $params{'submit'} = "Cite";
  265. }
  266. if ($format =~ /^(html|rtf|pdf|latex|latex_bbl|markdown|ascii)$/i) {
  267. $format =~ s/^latex_bbl$/LaTeX .bbl/i;
  268. $params{'citeType'} = $format;
  269. }
  270. elsif ($format =~ /^(ads|bibtex|endnote|isi|ris|atom|mods|oai_dc|odf|srw(_dc|_mods)?|word)$/i) {
  271. $params{'submit'} = "Export";
  272. $params{'exportType'} = "file";
  273. if ($format =~ /^(ads|bibtex|endnote|isi|ris)$/i) {
  274. $params{'exportFormat'} = $format;
  275. }
  276. elsif ($format =~ /^(atom|mods|oai_dc|odf|srw(_dc|_mods)?|word)$/i) {
  277. $params{'exportFormat'} = $format . " xml";
  278. }
  279. }
  280. else {
  281. $params{'citeType'} = "ascii";
  282. }
  283. delete($params{'format'});
  284. }
  285. # '-E|--extract' functionality:
  286. if ($params{'extractFile'} ne '') {
  287. $extractFile = $params{'extractFile'};
  288. # remove 'extractFile' parameter (which we don't need to send to the refbase server):
  289. delete($params{'extractFile'});
  290. # check if the given file exists and is readable:
  291. if (!(-r $extractFile)) {
  292. print "The '-E|--extract' option requires a name/path to an existing file.\n\n";
  293. exit;
  294. }
  295. else { # the given '$extractFile' exists and is readable
  296. $fileExtension = "default"; # triggers default regex patterns for extraction of cite IDs
  297. # extract any file name extension:
  298. if ($extractFile =~ /\.([^.\n]+)$/) {
  299. if (exists($citeIDPatterns{$1})) { # if '$extractFile' has a recognized file name extension
  300. $fileExtension = $1; # use file-specific regex patterns for extraction of cite IDs
  301. }
  302. }
  303. $citeIDRegex = @{$citeIDPatterns{$fileExtension}}[0]; # get regex pattern that matches the cite IDs in '$extractFile'
  304. $citeIDNum = @{$citeIDPatterns{$fileExtension}}[1]; # get number of the sub-pattern that captures the cite IDs
  305. $citeIDSplit = @{$citeIDPatterns{$fileExtension}}[2]; # get regex split pattern that matches the delimiter(s) used between multiple cite IDs
  306. # open '$extractFile' in read mode:
  307. open(KEYS, "<", $extractFile) || die "Can't open file '" . $extractFile . "': $!\n";
  308. # read the entire file at once:
  309. undef $/;
  310. $extractFileString = <KEYS>;
  311. # close '$extractFile':
  312. close(KEYS) || die "Can't close file: $!\n";
  313. # extract all refbase serial numbers or cite keys that exist in '$extractFile':
  314. while ($extractFileString =~ /$citeIDRegex/msg) {
  315. $citeID = $$citeIDNum;
  316. if ($citeID =~ /^(?:\d+|$citeIDSplit)+$/) { # '$citeID' is assumed to be a refbase serial number (or a list of multiple serials)
  317. while ($citeID =~ /(\d+)/g) {
  318. push(@extractFileSerials, $1);
  319. }
  320. }
  321. else { # '$citeID' is assumed to be a cite key (or a list of multiple cite keys)
  322. push(@extractFileKeys, split(/$citeIDSplit/, $citeID));
  323. }
  324. }
  325. # remove any duplicate cite IDs:
  326. @extractFileSerials = &uniquify(@extractFileSerials);
  327. @extractFileKeys = &uniquify(@extractFileKeys);
  328. # add query clause to restrict search results to records existing in '$extractFile':
  329. if (@extractFileSerials || @extractFileKeys) {
  330. if ($params{'where'} ne '') { $params{'where'} .= " AND "; }
  331. $params{'where'} .= "(";
  332. if (@extractFileSerials) {
  333. $params{'where'} .= 'serial RLIKE "^(' . join('|', @extractFileSerials) . ')$"';
  334. }
  335. if (@extractFileKeys) {
  336. if (@extractFileSerials) { $params{'where'} .= " OR "; }
  337. $params{'where'} .= 'cite_key RLIKE "^(' . join('|', map {quotemeta(quotemeta($_))} @extractFileKeys) . ')$"'; # quotes each key before joining them
  338. }
  339. $params{'where'} .= ")";
  340. }
  341. else { # no citation IDs could be extracted from '$extractFile'
  342. print "No citation IDs were found in file '" . $extractFile . "'!\n\n";
  343. exit;
  344. }
  345. }
  346. }
  347. # '-A|--append' mode:
  348. if ($params{'appendFile'} ne '') {
  349. # '-A|--append' mode currently only works with formats 'bibtex', 'mods' and 'srw_mods',
  350. # for all other formats (or if no format was specified), we'll return an error message:
  351. if ($format !~ /^(bibtex|mods|srw(_mods)?)$/i) {
  352. print "The '-A, --append' option requires '-F, --format' set to 'bibtex', 'mods' or 'srw_mods'.\n\n";
  353. exit;
  354. }
  355. $appendFile = $params{'appendFile'};
  356. # remove 'appendFile' parameter (which we don't need to send to the refbase server):
  357. delete($params{'appendFile'});
  358. $appendFileString = '';
  359. # if the given '$appendFile' exists and is readable:
  360. if (-r $appendFile) {
  361. # open '$appendFile' in read mode:
  362. open(FILEIN, "<", $appendFile) || die "Can't open file '" . $appendFile . "': $!\n";
  363. # read the entire file at once:
  364. undef $/;
  365. $appendFileString = <FILEIN>;
  366. # extract all refbase serial numbers (which already exist in '$appendFile') into an array:
  367. while ($appendFileString =~ /(?<=show\.php\?record=)(\d+)/g) {
  368. push(@appendFileSerials, $1);
  369. }
  370. # close '$appendFile':
  371. close(FILEIN) || die "Can't close file: $!\n";
  372. # backup existing contents of '$appendFile':
  373. # (note that, currently, this will overwrite contents of any existing backup file
  374. # with the same backup file name, even if nothing gets updated/appended further down)
  375. if ($backupAppendFile) {
  376. # create suitable backup file name:
  377. if ($appendFile =~ /\.([^.\n]+)$/) { # if the file has a file name extension
  378. ($appendFileBackup = $appendFile) =~ s/\.([^.\n]+)$/$backupFileNameSuffix.$1/; # add backup suffix in front of the file name extension
  379. }
  380. else {
  381. $appendFileBackup = $appendFile . $backupFileNameSuffix; # add backup suffix at end of file
  382. }
  383. # open backup file in write mode (creates new file if it doesn't exist):
  384. open(FILEBACKUP, ">", $appendFileBackup) || die "Can't open file '" . $appendFileBackup . "': $!\n";
  385. # write existing contents of '$appendFile' to backup file (replacing any previous contents):
  386. print FILEBACKUP $appendFileString;
  387. # close backup file:
  388. close(FILEBACKUP) || die "Can't close file: $!\n";
  389. }
  390. # add query clause to exclude existing records from search results:
  391. if (@appendFileSerials) {
  392. # save current contents of the '-w|--where' option before messing with it further:
  393. # (it will be needed in the 'append' and 'update' subroutines below)
  394. $where = $params{'where'};
  395. if ($params{'where'} ne '') { $params{'where'} .= " AND "; }
  396. $params{'where'} .= 'serial NOT RLIKE "^(' . join('|', @appendFileSerials) . ')$"';
  397. }
  398. }
  399. # if no '$appendFile' exists, it will be created by the 'append' subroutine below
  400. }
  401. # '-B|--update' functionality:
  402. if ($params{'updateRecords'} == 1) {
  403. $updateRecords = 1;
  404. }
  405. # remove 'updateRecords' parameter (which we don't need to send to the refbase server):
  406. delete($params{'updateRecords'});
  407. # for HTML output, we'll adjust the display type if the '-r|--records' option contains a single record serial number:
  408. if (($params{'citeType'} eq 'html') && ($params{'records'} =~ /^[0-9]+$/)) {
  409. $params{'submit'} = "Display";
  410. }
  411. # NOTE: I tried to put all query-related code into a dedicated function but for
  412. # some reason that didn't work with redirects. ?:-/
  413. # initialize new user agent:
  414. # (uses LWP::UserAgent)
  415. $userAgent = LWP::UserAgent->new;
  416. # set user agent string:
  417. $userAgent->agent("refbase/" . $version . " (http://cli.refbase.net/) ");
  418. # allow redirection for 'POST' requests:
  419. # (by default, the list of request names that '$userAgent->redirect_ok(...)'
  420. # will allow redirection for is only set to ['GET', 'HEAD'], as per RFC 2616)
  421. push @{ $userAgent->requests_redirectable }, 'POST';
  422. # set cookie jar object:
  423. # LWP will collect cookies and respond to cookie requests via its cookie jar, thus
  424. # enabling the user agent to fetch a PHP session ID from the refbase login response
  425. # and automatically resend it upon next request
  426. $userAgent->cookie_jar({ file => $cookieJarFile, autosave => 1 });
  427. # attempt to authenticate using the given login credentials:
  428. if (($loginParams{'loginEmail'} ne '') && ($loginParams{'loginPassword'} ne '')) {
  429. $loginSuccessful = &login(0); # call the 'login' subroutine
  430. }
  431. # construct URL:
  432. # (uses URI::URL)
  433. $script = "show.php";
  434. $url = url($host . $script);
  435. # build and send GET/POST request:
  436. # (uses HTTP::Request::Common & HTTP::Response)
  437. # build POST request using the 'application/x-www-form-urlencoded' content type:
  438. $request = POST $url, \%params;
  439. # or, build POST request using the 'multipart/form-data' content type:
  440. # $request = POST $url, Content_Type => 'form-data', Content => \%params;
  441. # alternatively, build GET request:
  442. # (note that for large URLs, a GET request may cause an "414 Request-URI Too Large" error)
  443. # $url->query_form(%params);
  444. # $request = GET $url;
  445. # print $request->as_string(); # DEBUG (dumps the GET/POST request)
  446. # send the GET/POST request:
  447. $response = $userAgent->request($request); # or use: $response = $userAgent->get($url);
  448. # print returned results:
  449. if ($response->is_error()) { # if the request fails, print error message to STDERR:
  450. print STDERR $response->status_line, "\n";
  451. }
  452. elsif (defined($appendFile)) { # if an '$appendFile' was given print results to FILE:
  453. if ($updateRecords == 1 && @appendFileSerials) {
  454. &update(0); # update existing records in '$appendFile' with found results
  455. }
  456. $resultsString = $response->content();
  457. # TODO: it would be better to use function 'splitRecs' to check whether there
  458. # were any results returned for the given format
  459. if ($resultsString =~ /^Nothing found!\n+/i) {
  460. $resultsString = '';
  461. }
  462. if (($resultsString eq '') || (($format =~ /^(mods|srw(_mods)?)$/i) && ($resultsString !~ /<mods[ >]/i))) {
  463. print "There are no records that need to be added to file '" . $appendFile . "'.\n\n";
  464. }
  465. else { # if the query returned results which aren't yet in '$appendFile':
  466. &append(0); # append search results to file
  467. }
  468. }
  469. else { # print results to STDOUT:
  470. binmode STDOUT;
  471. print $response->content();
  472. }
  473. # --------------------------------------------------------------------------------
  474. # Login with login credentials given in '%loginParams':
  475. sub login
  476. {
  477. local ($status) = @_;
  478. # construct URL:
  479. # (uses URI::URL)
  480. $loginScript = "user_login.php";
  481. $loginURL = url($host . $loginScript);
  482. # send POST request:
  483. # (uses HTTP::Request::Common & HTTP::Response)
  484. $loginRequest = POST $loginURL, \%loginParams;
  485. $loginResponse = $userAgent->request($loginRequest);
  486. if ($loginResponse->is_error()) {
  487. print STDERR $loginResponse->status_line, "\n";
  488. exit $status;
  489. }
  490. else {
  491. $location = $loginResponse->header('Location');
  492. # upon successful login, refbase will redirect to 'index.php'
  493. if ($location =~ /index.php/) {
  494. return 1; # login successful
  495. }
  496. else {
  497. return 0; # login NOT successful
  498. }
  499. }
  500. }
  501. # --------------------------------------------------------------------------------
  502. # Append search results to file:
  503. sub append
  504. {
  505. local ($status) = @_;
  506. local @newAppendFileData = ();
  507. $whereAppend = "";
  508. # TODO: In order to enable sorting of all records for MODS + SRW, we should remove the
  509. # XML file header & footer from the results (and store them in a variable). Then
  510. # merge existing & new records, split, sort, join again, and finally put the XML
  511. # header & footer back.
  512. if ($format =~ /^(mods|srw(_mods)?)$/i) {
  513. # if the '$appendFile' contains MODS XML data:
  514. if (($format =~ /^mods$/i) && ($appendFileString =~ /<modsCollection/i)) {
  515. # remove XML file header and '<modsCollection>' opening tag from results:
  516. $resultsString =~ s/^<\?xml.+?<modsCollection[^>\n]*?>\n//ims;
  517. # remove '</modsCollection>' closing tag from existing records in '$appendFile':
  518. $appendFileString =~ s/^<\/modsCollection>//ims;
  519. }
  520. # if the '$appendFile' contains SRW_MODS XML data:
  521. elsif (($format =~ /^srw(_mods)?$/i) && ($appendFileString =~ /<srw:records/i)) {
  522. # remove XML file header and opening tags from results:
  523. $resultsString =~ s/^<\?xml.+?<srw:records[^>\n]*?>\n//ims;
  524. # remove closing tags from existing records in '$appendFile':
  525. $appendFileString =~ s/^\s*<\/srw:records>.*?<\/srw:searchRetrieveResponse>//ims;
  526. # TODO: update values in '<srw:numberOfRecords>' and '<srw:recordPosition>'
  527. }
  528. }
  529. # append search results to the '$appendFile' contents:
  530. $newAppendFileString = $appendFileString;
  531. if (!($sortAppendFileData) && ($format =~ /^bibtex$/i) && ($appendFileString ne '') && ($resultsString ne '')) {
  532. $newAppendFileString .= "\n";
  533. }
  534. $newAppendFileString .= $resultsString;
  535. # sort all records by cite key:
  536. if ($sortAppendFileData) {
  537. # remove any newlines from end of string:
  538. $newAppendFileString =~ s/[\n\r]+$//;
  539. # split '$appendFile' contents & search results on (format-specific) record delimiters:
  540. @newAppendFileData = &splitRecs($newAppendFileString);
  541. # sort array of records by cite key:
  542. @newAppendFileData = &sortRecs(@newAppendFileData);
  543. # merge again records into a string:
  544. if ($format =~ /^bibtex$/i) {
  545. $recDelim = "\n\n";
  546. }
  547. else {
  548. $recDelim = "\n";
  549. }
  550. $newAppendFileString = join($recDelim, @newAppendFileData) . "\n";
  551. }
  552. # open '$appendFile' in write mode (creates new file if it doesn't exist):
  553. open(FILEADD, ">", $appendFile) || die "Can't open file '" . $appendFile . "': $!\n";
  554. # write existing contents & new results back to '$appendFile':
  555. print FILEADD $newAppendFileString;
  556. # close '$appendFile':
  557. close(FILEADD) || die "Can't close file: $!\n";
  558. # to give some feedback, we output all appended records as citations to STDOUT:
  559. # TODO: move into a dedicated function if possible (compare with 'update' subroutine)
  560. if ($reportResults) {
  561. $params{'submit'} = "Cite";
  562. $params{'citeType'} = "ascii";
  563. $params{'showRows'} = "99999";
  564. $params{'headerMsg'} = "Added records:";
  565. if ($where ne '') { $whereAppend = $where . " AND "; }
  566. if (($appendFileString ne '') && (@appendFileSerials)) { # '$appendFile' contains some records
  567. # add query clause to exclude existing records from search results:
  568. $whereAppend .= 'serial NOT RLIKE "^(' . join('|', @appendFileSerials) . ')$"';
  569. }
  570. else { # '$appendFile' didn't exist or was empty
  571. # extract all refbase serial numbers from '$resultsString' into an array:
  572. while ($resultsString =~ /(?<=show\.php\?record=)(\d+)/g) {
  573. push(@appendFileSerials, $1);
  574. }
  575. # add query clause to display all records from the current search results:
  576. $whereAppend .= 'serial RLIKE "^(' . join('|', @appendFileSerials) . ')$"';
  577. }
  578. $params{'where'} = $whereAppend;
  579. # construct URL:
  580. # (uses URI::URL)
  581. $appendFeedbackURL = url($host . $script);
  582. # send POST request:
  583. # (uses HTTP::Request::Common & HTTP::Response)
  584. $appendFeedbackRequest = POST $appendFeedbackURL, \%params;
  585. $appendFeedbackResponse = $userAgent->request($appendFeedbackRequest);
  586. print $appendFeedbackResponse->content();
  587. }
  588. }
  589. # --------------------------------------------------------------------------------
  590. # Update existing records (given in '$appendFile') with found results:
  591. sub update
  592. {
  593. local ($status) = @_;
  594. local @appendFileData = ();
  595. local @newAppendFileData = ();
  596. @updatedSerials = ();
  597. $whereUpdate = "";
  598. # add query clause to restrict search results to records existing in '$appendFile':
  599. if ($where ne '') { $whereUpdate = $where . " AND "; }
  600. $whereUpdate .= 'serial RLIKE "^(' . join('|', @appendFileSerials) . ')$"';
  601. $params{'where'} = $whereUpdate;
  602. # fetch all records that match the given query AND which exist in '$appendFile':
  603. # construct URL:
  604. # (uses URI::URL)
  605. $updateURL = url($host . $script);
  606. # send POST request:
  607. # (uses HTTP::Request::Common & HTTP::Response)
  608. $updateRequest = POST $updateURL, \%params;
  609. $updateResponse = $userAgent->request($updateRequest);
  610. $updateResultsString = $updateResponse->content();
  611. # if the '$appendFile' contains MODS XML data:
  612. if (($format =~ /^mods$/i) && ($appendFileString =~ /<modsCollection/i)) {
  613. # remove XML file header and '<modsCollection>' opening tag from results:
  614. $updateResultsString =~ s/^<\?xml.+?<modsCollection[^>\n]*?>\n//ims;
  615. # remove '</modsCollection>' closing tag from results:
  616. $updateResultsString =~ s/^<\/modsCollection>//ims;
  617. }
  618. # if the '$appendFile' contains SRW_MODS XML data:
  619. elsif (($format =~ /^srw(_mods)?$/i) && ($appendFileString =~ /<srw:records/i)) {
  620. # remove XML file header and opening tags from results:
  621. $updateResultsString =~ s/^<\?xml.+?<srw:records[^>\n]*?>\n//ims;
  622. # remove closing tags from results:
  623. $updateResultsString =~ s/^\s*<\/srw:records>.*?<\/srw:searchRetrieveResponse>//ims;
  624. # TODO: update values in '<srw:numberOfRecords>' and '<srw:recordPosition>'
  625. }
  626. # split search results on (format-specific) record delimiters:
  627. @updateResultsData = &splitRecs($updateResultsString);
  628. # extract refbase serial number & UNIX time stamp from returned records:
  629. %updateResultsRecords = &parseRecs(@updateResultsData);
  630. # split '$appendFile' contents on (format-specific) record delimiters:
  631. @appendFileData = &splitRecs($appendFileString);
  632. # replace existing records with new ones:
  633. foreach $record (@appendFileData) { # for each of the records existing in '$appendFile'
  634. # NOTE: instead of looping over all existing records, it would probably be more effective
  635. # to loop over each of the newly fetched (updated) records instead, and directly replace
  636. # all records in '$appendFileString' that have a matching serial number, but I couldn't
  637. # get this working correctly...
  638. # if this record contains a refbase serial number & UNIX time stamp:
  639. if ($record =~ /(?<=show\.php\?record=)(\d+)[^\d\n]+(\d{2}) ([[:alpha:]]{3}) (\d{4}) (\d{2}):(\d{2}):(\d{2})/i) {
  640. $serial = $1;
  641. if (exists($updateResultsRecords{$serial})) { # if one of the newly fetched (updated) records has an identical serial number
  642. # extract time stamp from existing record:
  643. $monthday = $2;
  644. $month = $months{$3}; # 0 == January
  645. $year = $4 - 1900; # by default, Perl counts years from 1900
  646. $hour = $5;
  647. $min = $6;
  648. $sec = $7;
  649. # convert existing record's time stamp to epoch seconds:
  650. $modifiedDateExisting = timelocal($sec, $min, $hour, $monthday, $month, $year); # uses Time::Local
  651. # extract source data & time stamp from the matching updated record:
  652. $updatedRecord = @{$updateResultsRecords{$serial}}[0];
  653. $modifiedDateUpdated = @{$updateResultsRecords{$serial}}[1];
  654. if ($modifiedDateUpdated > $modifiedDateExisting) { # if the updated record has a more recent time stamp
  655. # replace existing record with updated record:
  656. # (variables '$recStart' and '$recEnd' get defined in subroutine 'splitRecs')
  657. $record =~ s/^$recStart.+?exported from refbase.+?show\.php\?record=$serial.+?$recEnd$/$updatedRecord/ims;
  658. push(@updatedSerials, $serial);
  659. }
  660. }
  661. }
  662. push(@newAppendFileData, $record);
  663. }
  664. if (! @updatedSerials) {
  665. print "There are no records that need to be updated in file '" . $appendFile . "'.\n\n";
  666. }
  667. else {
  668. # update existing records in variable '$appendFileString':
  669. if ($format =~ /^bibtex$/i) {
  670. $recDelim = "\n\n";
  671. }
  672. else {
  673. $recDelim = "\n";
  674. }
  675. $appendFileString = join($recDelim, @newAppendFileData);
  676. # if the initial query did not return results which aren't yet in '$appendFile':
  677. # (i.e. if the 'append' subroutine won't get triggered)
  678. if (($resultsString eq '') || (($format =~ /^(mods|srw(_mods)?)$/i) && ($resultsString !~ /<mods[ >]/i))) {
  679. # open '$appendFile' in write mode:
  680. open(FILEOUT, ">", $appendFile) || die "Can't open file '" . $appendFile . "': $!\n";
  681. # write back updated file:
  682. print FILEOUT $appendFileString;
  683. # close '$appendFile':
  684. close(FILEOUT) || die "Can't close file: $!\n";
  685. }
  686. # otherwise, the 'append' subroutine will write contents of '$appendFileString' back to '$appendFile'
  687. # to give some feedback, we output all updated records as citations to STDOUT:
  688. # TODO: move into a dedicated function if possible (compare with 'append' subroutine)
  689. if ($reportResults) {
  690. $params{'submit'} = "Cite";
  691. $params{'citeType'} = "ascii";
  692. $params{'showRows'} = "99999";
  693. $params{'headerMsg'} = "Updated records:";
  694. $whereUpdate = "";
  695. # add query clause to restrict search results to updated records:
  696. if ($where ne '') { $whereUpdate = $where . " AND "; }
  697. $whereUpdate .= 'serial RLIKE "^(' . join('|', @updatedSerials) . ')$"';
  698. $params{'where'} = $whereUpdate;
  699. # construct URL:
  700. # (uses URI::URL)
  701. $updateFeedbackURL = url($host . $script);
  702. # send POST request:
  703. # (uses HTTP::Request::Common & HTTP::Response)
  704. $updateFeedbackRequest = POST $updateFeedbackURL, \%params;
  705. $updateFeedbackResponse = $userAgent->request($updateFeedbackRequest);
  706. print $updateFeedbackResponse->content();
  707. }
  708. }
  709. }
  710. # --------------------------------------------------------------------------------
  711. # Split '$sourceText' on format-specific record delimiters:
  712. # (note that, currently, only formats 'bibtex', 'mods' and 'srw_mods' are supported)
  713. sub splitRecs
  714. {
  715. local ($sourceText) = @_;
  716. local (@records) = ();
  717. if ($format =~ /^(bibtex|mods|srw(_mods)?)$/i) {
  718. # define format-specific strings that open/close a record:
  719. # (note that '...' must be used here instead of "...", see Friedl regex book, 1st ed., p.300+)
  720. if ($format =~ /^bibtex$/i) {
  721. $recStart = '@';
  722. $recEnd = '\}';
  723. }
  724. elsif ($format =~ /^mods$/i) {
  725. $recStart = '\s*<mods[ >]';
  726. $recEnd = 'mods>';
  727. }
  728. elsif ($format =~ /^srw(_mods)?$/i) {
  729. $recStart = '\s*<srw:record[ >]';
  730. $recEnd = 'srw:record>';
  731. }
  732. @records = split(/(?<=$recEnd)\s*\n(?=$recStart)/m, $sourceText);
  733. }
  734. return @records;
  735. }
  736. # --------------------------------------------------------------------------------
  737. # Extract records containing a refbase serial number & UNIX time stamp from '@records':
  738. # Returns a hash of records where each hash element
  739. # - is keyed by the record's serial number
  740. # - contains a reference to an unnamed array which holds two array elements:
  741. # - the record's source data
  742. # - the modified date/time stamp converted to epoch seconds
  743. sub parseRecs
  744. {
  745. local (@records) = @_;
  746. %refbaseRecords = ();
  747. foreach $record (@records) {
  748. # extract refbase serial number & UNIX time stamp from this record:
  749. if ($record =~ /(?<=show\.php\?record=)(\d+)[^\d\n]+(\d{2}) ([[:alpha:]]{3}) (\d{4}) (\d{2}):(\d{2}):(\d{2})/i) {
  750. $serial = $1;
  751. $monthday = $2;
  752. $month = $months{$3}; # 0 == January
  753. $year = $4 - 1900; # by default, Perl counts years from 1900
  754. $hour = $5;
  755. $min = $6;
  756. $sec = $7;
  757. # add modified date to array (after converting to epoch seconds):
  758. $modifiedDate = timelocal($sec, $min, $hour, $monthday, $month, $year); # uses Time::Local
  759. $record =~ s/\s+$//; # remove any trailing whitespace
  760. # note that if '@records' contains several records with the same refbase
  761. # serial number, only the last one will be included in '%refbaseRecords'
  762. # (and thus only the last one will get updated by the 'update' subroutine)
  763. push(@{$refbaseRecords{$serial}}, $record);
  764. push(@{$refbaseRecords{$serial}}, $modifiedDate);
  765. # NOTE: by storing a reference to an array we can add multiple values per hash key (see Perl Cookbook 5.7)
  766. }
  767. }
  768. return %refbaseRecords;
  769. }
  770. # --------------------------------------------------------------------------------
  771. # Sort array of records by cite key:
  772. # (note that, currently, only formats 'bibtex', 'endnote', 'ris' 'mods' and 'srw_mods' are supported)
  773. sub sortRecs
  774. {
  775. local (@records) = @_;
  776. %keyedRecordData = ();
  777. @sortedRecordData = ();
  778. $i = 1;
  779. $fileExtension = "default"; # triggers default regex patterns for extraction of cite IDs
  780. # use file-specific regex patterns for extraction of cite IDs:
  781. if ($format =~ /^(bibtex|endnote|ris|mods|srw(_mods)?)$/i) {
  782. if ($format =~ /^bibtex$/i) {
  783. $fileExtension = "bib";
  784. }
  785. elsif ($format =~ /^endnote$/i) {
  786. $fileExtension = "enw";
  787. }
  788. elsif ($format =~ /^ris$/i) {
  789. $fileExtension = "ris";
  790. }
  791. elsif ($format =~ /^(mods|srw(_mods)?)$/i) {
  792. $fileExtension = "xml";
  793. }
  794. }
  795. $citeIDRegex = @{$citeIDPatterns{$fileExtension}}[0]; # get regex pattern that matches the cite IDs in the records
  796. $citeIDNum = @{$citeIDPatterns{$fileExtension}}[1]; # get number of the sub-pattern that captures the cite IDs
  797. foreach $record (@records) {
  798. # extract cite IDs and use them as hash keys:
  799. # (we always append an incrementing number to ensure unique keys; the number
  800. # is padded with leading zeros in order to allow for correct string sorting)
  801. if ($record =~ /$citeIDRegex/msg) {
  802. $citeID = $$citeIDNum . sprintf("-%06d", $i++);
  803. }
  804. else { # no cite ID found, so we just use an incrementing number as hash key
  805. $citeID = sprintf("%06d", $i++);
  806. }
  807. $keyedRecordData{$citeID} = $record;
  808. }
  809. @sortedKeys = sort keys %keyedRecordData;
  810. foreach $key (@sortedKeys) {
  811. push(@sortedRecordData, $keyedRecordData{$key});
  812. }
  813. return @sortedRecordData;
  814. }
  815. # --------------------------------------------------------------------------------
  816. # Remove any duplicate items from '@array':
  817. sub uniquify
  818. {
  819. local (@array) = @_;
  820. local (%unique) = ();
  821. foreach $item (@array) {
  822. $unique{$item}++;
  823. }
  824. return (sort keys %unique);
  825. }
  826. # --------------------------------------------------------------------------------
  827. # Print usage and exit:
  828. sub usage
  829. {
  830. local ($status) = @_;
  831. print "\nrefbase command line client, v" . $version . " by Matthias Steffens, http://cli.refbase.net/\n\n"
  832. . "Usage: refbase [OPTIONS]\n\n"
  833. . "Notes: - At least one query option must be given and unrecognized options will be ignored.\n"
  834. . " - If multiple options are given, they will by default be connected with 'AND'. Use\n"
  835. . " '--query=or' to connect multiple options with 'OR'.\n"
  836. . " - Options syntax: [OPTION]=[VALUE], e.g. '-a=steffens' or '--author=\"steffens, m\"'.\n"
  837. . " - Returns up to '--rows' number of records beginning with '--start'. If all given\n"
  838. . " query options are empty, all database records will be returned.\n"
  839. . " - Note that '--records' assumes a list of full record serials separated by non-digit\n"
  840. . " characters while '--serial' allows for partial matches.\n"
  841. . " - For each option, default values can be specified at the top of the script.\n"
  842. . " Current defaults are given in parentheses.\n\n"
  843. . "General Options: -h, --help - display this help text\n"
  844. . " -v, --version - display version information\n"
  845. . " -X, --examples - display usage examples\n\n"
  846. . "Query Options: -a, --author - search author field ('" . $params{'author'} . "')\n"
  847. . " -b, --abstract - search abstract field ('" . $params{'abstract'} . "')\n"
  848. . " -c, --citekey - search cite_key field, requires '-u, --userid' ('" . $params{'cite_key'} . "')\n"
  849. . " -d, --date - search by creation date ('" . $params{'date'} . "')\n"
  850. . " -e, --area - search area field ('" . $params{'area'} . "')\n"
  851. . " -f, --thesis - search thesis field ('" . $params{'thesis'} . "')\n"
  852. . " -i, --contribid - search contribution_id field ('" . $params{'contribution_id'} . "')\n"
  853. . " -j, --journal - search abbrev_journal field ('" . $params{'abbrev_journal'} . "')\n"
  854. . " -k, --keywords - search keywords field ('" . $params{'keywords'} . "')\n"
  855. . " -l, --location - search location field ('" . $params{'location'} . "')\n"
  856. . " -m, --marked - search marked field, requires '-u, --userid' ('" . $params{'ismarked'} . "')\n"
  857. . " -n, --notes - search notes field ('" . $params{'notes'} . "')\n"
  858. . " -p, --publication - search publication field ('" . $params{'publication'} . "')\n"
  859. . " -q, --query - query type, possible values: and, or ('" . $params{'queryType'} . "')\n"
  860. . " -r, --records - search serial field ('" . $params{'records'} . "')\n"
  861. . " -s, --selected - search selected field, requires '-u, --userid' ('" . $params{'selected'} . "')\n"
  862. . " -t, --title - search title field ('" . $params{'title'} . "')\n"
  863. . " -u, --userid - join with user-specific data from user ID ('" . $params{'userID'} . "')\n"
  864. . " -w, --where - search by using a raw sql where clause ('" . $params{'where'} . "')\n"
  865. . " -x, --type - search type field ('" . $params{'type'} . "')\n"
  866. . " -y, --year - search year field ('" . $params{'year'} . "')\n"
  867. . " -z, --serial - search serial field (partial matches) ('" . $params{'serial'} . "')\n\n"
  868. . "Output Options: -A, --append - file to which returned records are appended ('" . $params{'appendFile'} . "')\n"
  869. . " requires '-F, --format': bibtex, mods, srw_mods\n"
  870. . " -B, --update - update existing records in '-A, --append' file ('" . $params{'updateRecords'} . "')\n"
  871. . " possible values: 0, 1\n"
  872. . " -C, --style - citation style ('" . $params{'citeStyle'} . "')\n"
  873. . " -E, --extract - file from which citation IDs are extracted ('" . $params{'extractFile'} . "')\n"
  874. . " supported file types: " . join(', ', sort keys(%citeIDPatterns)) . "\n"
  875. . " -F, --format - output format ('" . $params{'format'} . "')\n"
  876. . " possible values: html, rtf, pdf, latex, latex_bbl, markdown, ascii,\n"
  877. . " ads, bibtex, endnote, isi, ris, atom, mods, oai_dc,\n"
  878. . " odf, srw_dc, srw_mods, word\n"
  879. . " -L, --showlinks - hide/display links column in html output ('" . $params{'showLinks'} . "')\n"
  880. . " possible values: 0, 1\n"
  881. . " -O, --order - sort order of returned records ('" . $params{'citeOrder'} . "')\n"
  882. . " possible values: author, year, type, type-year, creation-date\n"
  883. . " -Q, --showquery - hide/display SQL query in ASCII output ('" . $params{'showQuery'} . "')\n"
  884. . " possible values: 0, 1\n"
  885. . " -R, --rows - number of records to be returned ('" . $params{'showRows'} . "')\n"
  886. . " -S, --start - number of first record to be returned ('" . $params{'startRecord'} . "')\n"
  887. . " -V, --view - view type of html output ('" . $params{'viewType'} . "')\n"
  888. . " possible values: web, print, mobile\n\n"
  889. . "Server Options: -H, --host - URL of the refbase database ('" . $host . "')\n"
  890. . " defined shortcuts: " . join(', ', sort keys(%hosts)) . "\n"
  891. . " -P, --password - password for given '-U, --user' account";
  892. if ($loginParams{'loginPassword'} ne '') {
  893. print "\n (a default pwd has been defined)\n";
  894. }
  895. else {
  896. print " ('')\n";
  897. }
  898. print " -U, --user - login email address of an existing refbase user\n"
  899. . " ('" . $loginParams{'loginEmail'} . "')\n\n";
  900. exit $status;
  901. }
  902. # --------------------------------------------------------------------------------
  903. # Print version number and exit:
  904. sub version
  905. {
  906. local ($status) = @_;
  907. print "\nrefbase command line client, version " . $version
  908. . "\ncheck for updates at http://cli.refbase.net/\n\n";
  909. exit $status;
  910. }
  911. # --------------------------------------------------------------------------------
  912. # Print examples and exit:
  913. sub examples
  914. {
  915. local ($status) = @_;
  916. print <<'END_EXAMPLES';
  917. --------------------------------------------------------------------------------
  918. REFBASE USAGE EXAMPLES:
  919. --------------------------------------------------------------------------------
  920. 1) Find all records where the author field contains 'mock' AND the year field
  921. contains '2005':
  922. refbase -a=mock -y=2005
  923. --------------------------------------------------------------------------------
  924. 2) Find all records where the author field contains 'mock' OR the title field
  925. contains 'photo', and display 10 records starting with the 21st record in the
  926. result set:
  927. refbase -a=mock -t=photo -q=or -R=10 -S=21
  928. --------------------------------------------------------------------------------
  929. 3) Export records with serial numbers '1', '12' and '34' to Endnote format and
  930. save them to a file named 'export.enw':
  931. refbase -r=1,12,34 -F=endnote > export.enw
  932. --------------------------------------------------------------------------------
  933. 4) Return up to 50 records that were selected by a user with a user ID '2' in
  934. RTF format using citation style "Ann Glaciol" and sorting them first by
  935. record type, then by year, and save results to a file named 'citations.rtf':
  936. refbase -s=yes -u=2 -R=50 -F=rtf -C="Ann Glaciol" -O=type-year > citations.rtf
  937. --------------------------------------------------------------------------------
  938. 5) Find all records which were modified today by a user named "admin" and where
  939. the location field contains 'msteffens' (note the use of the '-w' option to
  940. specify a custom WHERE clause):
  941. refbase -w='modified_date = CURDATE() AND modified_by RLIKE "admin"' -l=msteffens
  942. --------------------------------------------------------------------------------
  943. 6) Find all records where the cite_key field (of a user with a user ID '2')
  944. contains 'steffens', and append records in MODS XML format to file 'mods.xml'
  945. if they don't yet exist in that file:
  946. refbase -u=2 -c=steffens -F=mods -A=mods.xml
  947. --------------------------------------------------------------------------------
  948. 7) Find all records where the contribution_id field contains 'AWI' and where
  949. the keywords field contains 'seaweeds', and append records in BibTeX format
  950. to file 'paper.bib' if they don't yet exist in that file. In case found
  951. records already exist in file 'paper.bib', update them if their modification
  952. date is more recent:
  953. refbase -i=AWI -k=seaweeds -F=bibtex -A=paper.bib -B=1
  954. --------------------------------------------------------------------------------
  955. 8) Extract all citation IDs from file 'paper.aux', and append matching records
  956. (for a user with a user ID '2') in BibTeX format to file 'paper.bib' if they
  957. don't yet exist in that file. In case found records already exist in file
  958. 'paper.bib', update them if their modification date is more recent:
  959. refbase -u=2 -E=paper.aux -F=bibtex -A=paper.bib -B=1
  960. --------------------------------------------------------------------------------
  961. 9) Extract all citation IDs from file 'bibtex.bbl', and save matching records
  962. (for a user with a user ID '2') in LaTeX bibliography (.bbl) format to file
  963. 'refbase.bbl' using the "APA" citation style (the .bbl file generated by
  964. refbase can be used as a replacement of the BibTeX-generated .bbl file):
  965. refbase -u=2 -E=bibtex.bbl -F=latex_bbl -C=APA > refbase.bbl
  966. --------------------------------------------------------------------------------
  967. END_EXAMPLES
  968. exit $status;
  969. }
  970. __END__