1184 lines
		
	
	
		
			51 KiB
		
	
	
	
		
			Perl
		
	
	
	
	
	
			
		
		
	
	
			1184 lines
		
	
	
		
			51 KiB
		
	
	
	
		
			Perl
		
	
	
	
	
	
| #!/usr/bin/perl
 | |
| 
 | |
| # Project:    Web Reference Database (refbase) <http://www.refbase.net>
 | |
| # Copyright:  Matthias Steffens <mailto:refbase@extracts.de> and the file's
 | |
| #             original author(s).
 | |
| #
 | |
| #             This code is distributed in the hope that it will be useful,
 | |
| #             but WITHOUT ANY WARRANTY. Please see the GNU General Public
 | |
| #             License for more details.
 | |
| #
 | |
| # File:       ./contrib/command_line/refbase
 | |
| # Repository: $HeadURL: file:///svn/p/refbase/code/branches/bleeding-edge/contrib/command_line/refbase $
 | |
| # Author(s):  Matthias Steffens <mailto:refbase@extracts.de>
 | |
| #
 | |
| # Created:    06-Jun-06, 18:00
 | |
| # Modified:   $Date: 2008-11-13 21:08:22 +0000 (Thu, 13 Nov 2008) $
 | |
| #             $Author: msteffens $
 | |
| #             $Revision: 1315 $
 | |
| 
 | |
| # REFBASE -- a refbase command line interface
 | |
| 
 | |
| # Purpose:    Perl script that allows to search a refbase online database from the command line and retrieve results in various formats
 | |
| # Usage:      refbase [OPTIONS]
 | |
| 
 | |
| # Help:       For help with the syntax type 'refbase -h'
 | |
| #             To view some usage examples type 'refbase -X'
 | |
| #             Further information is available at <http://cli.refbase.net/>
 | |
| 
 | |
| # Version:    1.2.1
 | |
| 
 | |
| # Requires:   - a shell with Perl execution capabilities
 | |
| #             - the Perl CPAN modules LWP::UserAgent, HTTP::Request::Common, HTTP::Response, HTTP::Cookies and URI::URL
 | |
| #             - access to a refbase database (refbase-0.9.0 or greater, refbase-0.9.5 required for '-A|--append' and '-B|--update' mode)
 | |
| 
 | |
| # Limits:     - Currently, this utility supports search & retrieve, but does not support update actions such as add, edit or delete.
 | |
| #             - This script is currently just an interface to 'show.php', which for example does not support arbitrary sort orders.
 | |
| #             - Specifying the record offset (using '-S|--start') as well as the number of records to be returned (using '-R|--rows') will
 | |
| #               only work for the formats 'html', 'rtf', 'pdf', 'latex', 'latex_bbl', 'markdown', 'ascii', 'atom', 'srw_dc' and 'srw_mods',
 | |
| #               since the other formats are designed to always export the entire result set. Note that for 'html', '--start' is adjusted to
 | |
| #               the next lower value that is an exact multiple of '--rows' (which ensures correct behaviour of the browse links).
 | |
| #             - The authentication mechanism is currently limited in that a given password will be transferred as parameter in the POST request
 | |
| #             - The '-A|--append' and '-B|--update' modes currently only work with formats 'bibtex', 'mods' and 'srw_mods'
 | |
| 
 | |
| # --------------------------------------------------------------------------------------------------------------
 | |
| 
 | |
| $version = "1.2.1";
 | |
| 
 | |
| # Configure variables:
 | |
| 
 | |
| # Specify the full URLs to any refbase servers that shall be queried:
 | |
| # Notes: - the given hash keys will work as shortcuts, e.g. '--host=local' would query
 | |
| #          your local refbase installation; one hash key must be named 'default',
 | |
| #          all other keys can be freely chosen
 | |
| #        - by default, the server labeled with key 'default' will be queried
 | |
| %hosts = (
 | |
| 				'default' => 'http://beta.refbase.net/',
 | |
| 				'local'   => 'http://localhost/refs/',
 | |
| 				'beta'    => 'http://beta.refbase.net/',
 | |
| 				'beta2'   => 'http://refbase.textdriven.com/beta/',
 | |
| 				'demo'    => 'http://demo.refbase.net/',
 | |
| 				'org'     => 'http://www.refbase.org/'
 | |
| );
 | |
| 
 | |
| # Specify the default values for all options that are not explicitly specified:
 | |
| %params = (
 | |
| 				# query options:
 | |
| 				'author'          => '',    # -a|--author
 | |
| 				'abstract'        => '',    # -b|--abstract
 | |
| 				'cite_key'        => '',    # -c|--citekey     => requires '--userid'
 | |
| 				'date'            => '',    # -d|--date
 | |
| 				'area'            => '',    # -e|--area
 | |
| 				'thesis'          => '',    # -f|--thesis
 | |
| 				'contribution_id' => '',    # -i|--contribid
 | |
| 				'abbrev_journal'  => '',    # -j|--journal
 | |
| 				'keywords'        => '',    # -k|--keywords
 | |
| 				'location'        => '',    # -l|--location
 | |
| 				'ismarked'        => '',    # -m|--marked      => requires '--userid'
 | |
| 				'notes'           => '',    # -n|--notes
 | |
| 				'publication'     => '',    # -p|--publication
 | |
| 				'queryType'       => 'and', # -q|--query       => multiple options will by default be connected with 'AND'
 | |
| 				'records'         => '',    # -r|--records
 | |
| 				'selected'        => '',    # -s|--selected    => requires '--userid'
 | |
| 				'title'           => '',    # -t|--title
 | |
| 				'userID'          => '',    # -u|--userid      => the user ID of your account at the refbase database you're querying
 | |
| 				'where'           => '',    # -w|--where
 | |
| 				'type'            => '',    # -x|--type
 | |
| 				'year'            => '',    # -y|--year
 | |
| 				'serial'          => '.+',  # -z|--serial      => the default '.+' causes all database records to be returned if only empty params are given (normally, you should leave this default as is)
 | |
| 
 | |
| 				# output options:
 | |
| 				'appendFile'      => '',        # -A|--append    => file to which search results will be appended
 | |
| 				'updateRecords'   => '0',       # -B|--update    => update existing records in '-A, --append' file; must be '0' (=no) or '1' (=yes)
 | |
| 				'citeStyle'       => '',        # -C|--style     => desired citation style, given name must match an entry within the database's MySQL table 'styles' (keep empty to use the database default)
 | |
| 				'extractFile'     => '',        # -E|--extract   => file from which citations will be extracted
 | |
| 				'format'          => 'ascii',   # -F|--format    => output format must be 'html', 'rtf', 'pdf', 'latex', 'latex_bbl', 'markdown', 'ascii', 'ads', 'bibtex', 'endnote', 'isi', 'ris', 'atom', 'mods', 'oai_dc', 'odf', 'srw_dc', 'srw_mods', 'word' or '' (the empty string '' will produce the default 'ascii' output style)
 | |
| 				'showLinks'       => '1',       # -L|--showlinks => hide/display links column in HTML output; must be '0', '1', or '' (the empty string '' will produce the default output style, i.e. print any links)
 | |
| 				'citeOrder'       => 'author',  # -O|--order     => cite order must be 'author', 'year', 'type', 'type-year', 'creation-date' or '' (the empty string '' will produce the default 'author' sort order)
 | |
| 				'showQuery'       => '0',       # -Q|--showquery => hide/display SQL query in ASCII output; must be '0', '1', or '' (the empty string '' will produce the default output style, i.e. not showing the SQL query)
 | |
| 				'showRows'        => '',        # -R|--rows      => desired number of search results (keep empty to use the database default)
 | |
| 				'startRecord'     => '1',       # -S|--start     => offset of the first search result, starting with one
 | |
| 				'viewType'        => 'web',     # -V|--view      => view type of HTML output; must be 'Web', 'Print', 'Mobile' or '' (the empty string '' will produce the default 'Web' output style)
 | |
| 
 | |
| 				# fixed parameters:
 | |
| 				'submit'          => 'Cite',    # display type for HTML output; must be 'Display', 'Cite', 'Export', or '' (the empty string '' will produce the default 'columnar' output style); this param's value will get adopted below based on the chosen '--format'
 | |
| 				'client'          => "cli-refbase-" . $version # the client ID of this command line utility
 | |
| 
 | |
| );
 | |
| 
 | |
| # Specify the default login credentials for a refbase user account:
 | |
| %loginParams = (
 | |
| 				'loginEmail'      => '', # -U|--user     -> the login email address of an existing refbase user
 | |
| 				'loginPassword'   => ''  # -P|--password -> the password for the given user account
 | |
| );
 | |
| 
 | |
| # Specify the location of the cookie jar file:
 | |
| # This file will be used to store & retrieve cookies
 | |
| $cookieJarFile = "$ENV{HOME}/.lwpcookies.txt";
 | |
| 
 | |
| # For '-A|--append' mode, specify whether all records in the given file
 | |
| # shall be sorted by cite key after any records have been appended/updated:
 | |
| $sortAppendFileData = 1; # must be '1' (=yes) or '0' (=no)
 | |
| 
 | |
| # For '-A|--append' mode, specify whether all appended/updated
 | |
| # records shall be reported as citations to STDOUT:
 | |
| $reportResults = 1; # must be '1' (=yes) or '0' (=no)
 | |
| 
 | |
| # For '-A|--append' mode, specify whether a backup file shall be created
 | |
| # before anything gets updated or appended to the given file:
 | |
| $backupAppendFile = 1; # must be '1' (=yes) or '0' (=no)
 | |
| 
 | |
| # By default, the backup file uses the given file name with a tilde character
 | |
| # added at the end of the file name. Adopt to your needs if necessary:
 | |
| $backupFileNameSuffix = "~";
 | |
| 
 | |
| # For '-E|--extract' mode, specify regular expression patterns that match the
 | |
| # citation IDs (i.e. refbase serial numbers or cite keys from the 'cite_key'
 | |
| # field) in the given file:
 | |
| 
 | |
| # For each recognized file name extension[*], there must be a triplet of code
 | |
| # (as shown below), each hash element contains an array with three elements:
 | |
| # 1) a regex pattern that matches the cite IDs in the file (e.g. '\\\\cite\{(.+?)\}')
 | |
| #    (note that the regex patterns will be applied using the 'msg' mode modifiers)
 | |
| # 2) the number of the sub-pattern that captures the cite IDs (e.g. '1')
 | |
| # 3) a regex split pattern that matches the delimiter(s) used between multiple cite IDs (e.g. '[, ]+')
 | |
| # [*]: The hash key must match the file's file name extension. If the given file is
 | |
| #      of unknown file type the 'default' pattern will be applied.
 | |
| %citeIDPatterns = (
 | |
| # - LaTeX, .tex file:
 | |
| 					'tex' => [  '\\\\(?:(?:no)?cite|cite(?:n|num|online)|cite(?:al)?[tp]\*?|cite(?:author\*?|year(?:par)?|text|[tp]alias))(?:\[.*?\])*\{(.+?)\}', # 1), matches e.g.: '\cite{...}', '\nocite{...}', '\cite[...]{...}' and cite commands from the 'cite' & 'natbib' packages
 | |
| 								'1', # 2)
 | |
| 								'[, ]+' ], # 3)
 | |
| 
 | |
| # - LaTeX, .aux file:
 | |
| 					'aux' => [  '\\\\(?:citation|bibcite)\{(.+?)\}', # matches '\citation{...}' or '\bibcite{...}'
 | |
| 								'1',
 | |
| 								'[, ]+' ],
 | |
| 
 | |
| # - LaTeX, .bib file:
 | |
| 					'bib' => [  '^[\t ]*@[A-Za-z]+\{(.+?),', # matches '@Article{...,' etc
 | |
| 								'1',
 | |
| 								'[, ]+' ],
 | |
| 
 | |
| # - LaTeX, .bbl file:
 | |
| 					'bbl' => [  '\\\\bibitem(?s:\[.*?\])*\{(.+?)\}', # matches '\bibitem{...}' or '\bibitem[...]{...}'
 | |
| 								'1',
 | |
| 								'[, ]+' ],
 | |
| 
 | |
| # - MODS, SRW_MODS, or Endnote XML file:
 | |
| #   TODO: add support for OAI_DC, SRW_DC and refbase OpenSearch Atom XML (<dc.identifier>citekey:...</dc.identifier>)
 | |
| 					'xml' => [  '(?:<mods.+?ID="|<identifier.+?type="citekey">|<(?:label|accession-num)>\s*<style.*?>\s*)(.+?)(?:</identifier>|\s*</style>\s*</(?:label|accession-num)>|")', # matches MODS IDs like '<mods ID="..."', '<mods version="..." ID="..."' or '<identifier type="citekey">...</identifier>' etc -OR- Endnote XML IDs, e.g. '<label><style face="normal" font="default" size="100%">...</style></label>' etc
 | |
| 								'1',
 | |
| 								'[, ]+' ],
 | |
| 
 | |
| # - Endnote tagged text, .enw file:
 | |
| 					'enw' => [  '^%F ([^\n\r]+)$', # matches '%F ...'
 | |
| 								'1',
 | |
| 								'[, ]+' ],
 | |
| 
 | |
| # - RIS, .ris file:
 | |
| 					'ris' => [  '^ID  - ([^\n\r]+)$', # matches 'ID  - ...'
 | |
| 								'1',
 | |
| 								'[, ]+' ],
 | |
| 
 | |
| # - Generic, extracts IDs from lists of comma-separated refbase serial numbers (or cite keys) that are enclosed by braces:
 | |
| 					'default' => [  '\{([^\n\r]+?)\}', # matches e.g.: '{123}', '{1,12,33}', '{Arrigo+Thomas2004}' or '{Arrigo+Thomas2004,Assur1958}'
 | |
| 									'1',
 | |
| 									'[,]+' ],
 | |
| );
 | |
| 
 | |
| # --------------------------------------------------------------------------------
 | |
| 
 | |
| # NOTE: You shouldn't need to change anything below this line
 | |
| 
 | |
| # CPAN modules:
 | |
| use LWP::UserAgent; # more info: <http://search.cpan.org/~gaas/libwww-perl-5.805/lib/LWP/UserAgent.pm>
 | |
| use HTTP::Request::Common; # more info: <http://search.cpan.org/~gaas/libwww-perl-5.805/lib/HTTP/Request/Common.pm>
 | |
| use HTTP::Response; # more info: <http://search.cpan.org/~gaas/libwww-perl-5.805/lib/HTTP/Response.pm>
 | |
| use HTTP::Cookies; # more info: <http://search.cpan.org/~gaas/libwww-perl-5.805/lib/HTTP/Cookies.pm>
 | |
| use URI::URL; # more info: <http://search.cpan.org/~gaas/URI-1.35/URI/URL.pm>
 | |
| 
 | |
| # standard modules:
 | |
| use Time::Local;
 | |
| 
 | |
| # initialize variables:
 | |
| $host = $hosts{'default'};
 | |
| $optCt = 0;
 | |
| $format = '';
 | |
| @appendFileSerials = ();
 | |
| $updateRecords = 0;
 | |
| @extractFileSerials = ();
 | |
| @extractFileKeys = ();
 | |
| 
 | |
| %months = (
 | |
| 				'Jan' => 0,
 | |
| 				'Feb' => 1,
 | |
| 				'Mar' => 2,
 | |
| 				'Apr' => 3,
 | |
| 				'May' => 4,
 | |
| 				'Jun' => 5,
 | |
| 				'Jul' => 6,
 | |
| 				'Aug' => 7,
 | |
| 				'Sep' => 8,
 | |
| 				'Oct' => 9,
 | |
| 				'Nov' => 10,
 | |
| 				'Dec' => 11
 | |
| );
 | |
| 
 | |
| # Extract options:
 | |
| # TODO: use Getopt::Long
 | |
| 
 | |
| # general options:
 | |
| if (($ARGV[0] eq '--help') or ($ARGV[0] eq '-h') or ($ARGV[0] eq '')) { &usage (0); } # if the user asked for --help/-h or didn't provide any input, call the 'usage' subroutine
 | |
| elsif (($ARGV[0] eq '--version') or ($ARGV[0] eq '-v')) { &version (0); } # show version information
 | |
| elsif (($ARGV[0] eq '--examples') or ($ARGV[0] eq '-X')) { &examples (0); } # print some usage examples
 | |
| 
 | |
| else {
 | |
| 	foreach (@ARGV) {
 | |
| 		# extract query options:
 | |
| 		if ($_ =~ /^(?:-a|--author)=(.+)$/) { $params{'author'} = $1; $optCt++; }
 | |
| 		elsif ($_ =~ /^(?:-b|--abstract)=(.+)$/) { $params{'abstract'} = $1; $optCt++; }
 | |
| 		elsif ($_ =~ /^(?:-c|--citekey)=(.+)$/) { $params{'cite_key'} = $1; $optCt++; }
 | |
| 		elsif ($_ =~ /^(?:-d|--date)=(.+)$/) { $params{'date'} = $1; $optCt++; }
 | |
| 		elsif ($_ =~ /^(?:-e|--area)=(.+)$/) { $params{'area'} = $1; $optCt++; }
 | |
| 		elsif ($_ =~ /^(?:-f|--thesis)=(.+)$/) { $params{'thesis'} = $1; $optCt++; }
 | |
| 		elsif ($_ =~ /^(?:-i|--contribid)=(.+)$/) { $params{'contribution_id'} = $1; $optCt++; }
 | |
| 		elsif ($_ =~ /^(?:-j|--journal)=(.+)$/) { $params{'abbrev_journal'} = $1; $optCt++; }
 | |
| 		elsif ($_ =~ /^(?:-k|--keywords)=(.+)$/) { $params{'keywords'} = $1; $optCt++; }
 | |
| 		elsif ($_ =~ /^(?:-l|--location)=(.+)$/) { $params{'location'} = $1; $optCt++; }
 | |
| 		elsif ($_ =~ /^(?:-m|--marked)=(.+)$/) { $params{'ismarked'} = $1; $optCt++; }
 | |
| 		elsif ($_ =~ /^(?:-n|--notes)=(.+)$/) { $params{'notes'} = $1; $optCt++; }
 | |
| 		elsif ($_ =~ /^(?:-p|--publication)=(.+)$/) { $params{'publication'} = $1; $optCt++; }
 | |
| 		elsif ($_ =~ /^(?:-q|--query)=(.+)$/) { $params{'queryType'} = $1; }
 | |
| 		elsif ($_ =~ /^(?:-r|--records)=(.+)$/) { $params{'records'} = $1; $optCt++; }
 | |
| 		elsif ($_ =~ /^(?:-s|--selected)=(.+)$/) { $params{'selected'} = $1; $optCt++; }
 | |
| 		elsif ($_ =~ /^(?:-t|--title)=(.+)$/) { $params{'title'} = $1; $optCt++; }
 | |
| 		elsif ($_ =~ /^(?:-u|--userid)=(.+)$/) { $params{'userID'} = $1; $optCt++; }
 | |
| 		elsif ($_ =~ /^(?:-w|--where)=(.+)$/) { $params{'where'} = $1; $optCt++; }
 | |
| 		elsif ($_ =~ /^(?:-x|--type)=(.+)$/) { $params{'type'} = $1; $optCt++; }
 | |
| 		elsif ($_ =~ /^(?:-y|--year)=(.+)$/) { $params{'year'} = $1; $optCt++; }
 | |
| 		elsif ($_ =~ /^(?:-z|--serial)=(.+)$/) { $params{'serial'} = $1; }
 | |
| 
 | |
| 		# extract output options:
 | |
| 		elsif ($_ =~ /^(?:-A|--append)=(.+)$/) { $params{'appendFile'} = $1; }
 | |
| 		elsif ($_ =~ /^(?:-B|--update)=(.+)$/) { $params{'updateRecords'} = $1; }
 | |
| 		elsif ($_ =~ /^(?:-C|--style)=(.+)$/) { $params{'citeStyle'} = $1; }
 | |
| 		elsif ($_ =~ /^(?:-E|--extract)=(.+)$/) { $params{'extractFile'} = $1; }
 | |
| 		elsif ($_ =~ /^(?:-F|--format)=(.+)$/) { $params{'format'} = $1; }
 | |
| 		elsif ($_ =~ /^(?:-L|--showlinks)=(.+)$/) { $params{'showLinks'} = $1; }
 | |
| 		elsif ($_ =~ /^(?:-O|--order)=(.+)$/) { $params{'citeOrder'} = $1; }
 | |
| 		elsif ($_ =~ /^(?:-Q|--showquery)=(.+)$/) { $params{'showQuery'} = $1; }
 | |
| 		elsif ($_ =~ /^(?:-R|--rows)=(.+)$/) { $params{'showRows'} = $1; }
 | |
| 		elsif ($_ =~ /^(?:-S|--start)=(.+)$/) { $params{'startRecord'} = $1; }
 | |
| 		elsif ($_ =~ /^(?:-V|--view)=(.+)$/) { $params{'viewType'} = $1; }
 | |
| 
 | |
| 		# extract server options:
 | |
| 		elsif ($_ =~ /^(?:-H|--host)=(.+)$/) { $host = $1; }
 | |
| 		elsif ($_ =~ /^(?:-P|--password)=(.+)$/) { $loginParams{'loginPassword'} = $1; }
 | |
| 		elsif ($_ =~ /^(?:-U|--user)=(.+)$/) { $loginParams{'loginEmail'} = $1; }
 | |
| 	}
 | |
| }
 | |
| 
 | |
| # resolve any host shortcuts:
 | |
| if (exists($hosts{$host})) {
 | |
| 	$host = $hosts{$host};
 | |
| }
 | |
| elsif ($host !~ /^https?:\/\//i) {
 | |
| 	$host = $hosts{'default'}; # can't resolve given host, reset back to default
 | |
| }
 | |
| 
 | |
| # if any query option other than the 'serial' parameter was explicitly set,
 | |
| # remove any default '.+' value from the 'serial' parameter:
 | |
| # (otherwise an 'OR' query would always match everything)
 | |
| if (($optCt > 0) && ($params{'serial'} eq '.+')) {
 | |
| 	# if '--citekey', '--selected' or '--marked' is given, '--userid' must be specified as well; i.e.,
 | |
| 	# in case of these user-specific params, we'll only empty the 'serial' param if a user ID is present
 | |
| 	if (($params{'cite_key'} eq '') && ($params{'selected'} eq '') && ($params{'ismarked'} eq '')) {
 | |
| 		$params{'serial'} = '';
 | |
| 	}
 | |
| 	elsif ($params{'userID'} ne '') { # at least one of '--citekey', '--selected' or '--marked' was given together with a '--userid'
 | |
| 		$params{'serial'} = '';
 | |
| 	}
 | |
| }
 | |
| 
 | |
| # assign correct URL params based on the '-F|--format' option:
 | |
| if (exists($params{'format'})) {
 | |
| 	$format = $params{'format'};
 | |
| 	if ($format =~ /^(rtf|pdf|latex|latex_bbl|markdown|ascii)$/i) {
 | |
| 		$params{'submit'} = "Cite";
 | |
| 	}
 | |
| 	if ($format =~ /^(html|rtf|pdf|latex|latex_bbl|markdown|ascii)$/i) {
 | |
| 		$format =~ s/^latex_bbl$/LaTeX .bbl/i;
 | |
| 		$params{'citeType'} = $format;
 | |
| 	}
 | |
| 	elsif ($format =~ /^(ads|bibtex|endnote|isi|ris|atom|mods|oai_dc|odf|srw(_dc|_mods)?|word)$/i) {
 | |
| 		$params{'submit'} = "Export";
 | |
| 		$params{'exportType'} = "file";
 | |
| 
 | |
| 		if ($format =~ /^(ads|bibtex|endnote|isi|ris)$/i) {
 | |
| 			$params{'exportFormat'} = $format;
 | |
| 		}
 | |
| 		elsif ($format =~ /^(atom|mods|oai_dc|odf|srw(_dc|_mods)?|word)$/i) {
 | |
| 			$params{'exportFormat'} = $format . " xml";
 | |
| 		}
 | |
| 	}
 | |
| 	else {
 | |
| 		$params{'citeType'} = "ascii";
 | |
| 	}
 | |
| 
 | |
| 	delete($params{'format'});
 | |
| }
 | |
| 
 | |
| # '-E|--extract' functionality:
 | |
| if ($params{'extractFile'} ne '') {
 | |
| 	$extractFile = $params{'extractFile'};
 | |
| 	# remove 'extractFile' parameter (which we don't need to send to the refbase server):
 | |
| 	delete($params{'extractFile'});
 | |
| 
 | |
| 	# check if the given file exists and is readable:
 | |
| 	if (!(-r $extractFile)) {
 | |
| 		print "The '-E|--extract' option requires a name/path to an existing file.\n\n";
 | |
| 		exit;
 | |
| 	}
 | |
| 	else { 	# the given '$extractFile' exists and is readable
 | |
| 		$fileExtension = "default"; # triggers default regex patterns for extraction of cite IDs
 | |
| 		# extract any file name extension:
 | |
| 		if ($extractFile =~ /\.([^.\n]+)$/) {
 | |
| 			if (exists($citeIDPatterns{$1})) { # if '$extractFile' has a recognized file name extension
 | |
| 				$fileExtension = $1; # use file-specific regex patterns for extraction of cite IDs
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		$citeIDRegex = @{$citeIDPatterns{$fileExtension}}[0]; # get regex pattern that matches the cite IDs in '$extractFile'
 | |
| 		$citeIDNum   = @{$citeIDPatterns{$fileExtension}}[1]; # get number of the sub-pattern that captures the cite IDs
 | |
| 		$citeIDSplit = @{$citeIDPatterns{$fileExtension}}[2]; # get regex split pattern that matches the delimiter(s) used between multiple cite IDs
 | |
| 
 | |
| 		# open '$extractFile' in read mode:
 | |
| 		open(KEYS, "<", $extractFile) || die "Can't open file '" . $extractFile . "': $!\n";
 | |
| 
 | |
| 		# read the entire file at once:
 | |
| 		undef $/;
 | |
| 		$extractFileString = <KEYS>;
 | |
| 
 | |
| 		# close '$extractFile':
 | |
| 		close(KEYS) || die "Can't close file: $!\n";
 | |
| 
 | |
| 		# extract all refbase serial numbers or cite keys that exist in '$extractFile':
 | |
| 		while ($extractFileString =~ /$citeIDRegex/msg) {
 | |
| 			$citeID = $$citeIDNum;
 | |
| 			if ($citeID =~ /^(?:\d+|$citeIDSplit)+$/) { # '$citeID' is assumed to be a refbase serial number (or a list of multiple serials)
 | |
| 				while ($citeID =~ /(\d+)/g) {
 | |
| 					push(@extractFileSerials, $1);
 | |
| 				}
 | |
| 			}
 | |
| 			else { # '$citeID' is assumed to be a cite key (or a list of multiple cite keys)
 | |
| 				push(@extractFileKeys, split(/$citeIDSplit/, $citeID));
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		# remove any duplicate cite IDs:
 | |
| 		@extractFileSerials = &uniquify(@extractFileSerials);
 | |
| 		@extractFileKeys = &uniquify(@extractFileKeys);
 | |
| 
 | |
| 		# add query clause to restrict search results to records existing in '$extractFile':
 | |
| 		if (@extractFileSerials || @extractFileKeys) {
 | |
| 			if ($params{'where'} ne '') { $params{'where'} .= " AND "; }
 | |
| 			$params{'where'} .= "(";
 | |
| 			if (@extractFileSerials) {
 | |
| 				$params{'where'} .= 'serial RLIKE "^(' . join('|', @extractFileSerials) . ')$"';
 | |
| 			}
 | |
| 			if (@extractFileKeys) {
 | |
| 				if (@extractFileSerials) { $params{'where'} .= " OR "; }
 | |
| 				$params{'where'} .= 'cite_key RLIKE "^(' . join('|', map {quotemeta(quotemeta($_))} @extractFileKeys) . ')$"'; # quotes each key before joining them
 | |
| 			}
 | |
| 			$params{'where'} .= ")";
 | |
| 		}
 | |
| 		else { # no citation IDs could be extracted from '$extractFile'
 | |
| 			print "No citation IDs were found in file '" . $extractFile . "'!\n\n";
 | |
| 			exit;
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| # '-A|--append' mode:
 | |
| if ($params{'appendFile'} ne '') {
 | |
| 	# '-A|--append' mode currently only works with formats 'bibtex', 'mods' and 'srw_mods',
 | |
| 	# for all other formats (or if no format was specified), we'll return an error message:
 | |
| 	if ($format !~ /^(bibtex|mods|srw(_mods)?)$/i) {
 | |
| 		print "The '-A, --append' option requires '-F, --format' set to 'bibtex', 'mods' or 'srw_mods'.\n\n";
 | |
| 		exit;
 | |
| 	}
 | |
| 
 | |
| 	$appendFile = $params{'appendFile'};
 | |
| 	# remove 'appendFile' parameter (which we don't need to send to the refbase server):
 | |
| 	delete($params{'appendFile'});
 | |
| 
 | |
| 	$appendFileString = '';
 | |
| 
 | |
| 	# if the given '$appendFile' exists and is readable:
 | |
| 	if (-r $appendFile) {
 | |
| 		# open '$appendFile' in read mode:
 | |
| 		open(FILEIN, "<", $appendFile) || die "Can't open file '" . $appendFile . "': $!\n";
 | |
| 
 | |
| 		# read the entire file at once:
 | |
| 		undef $/;
 | |
| 		$appendFileString = <FILEIN>;
 | |
| 
 | |
| 		# extract all refbase serial numbers (which already exist in '$appendFile') into an array:
 | |
| 		while ($appendFileString =~ /(?<=show\.php\?record=)(\d+)/g) {
 | |
| 			push(@appendFileSerials, $1);
 | |
| 		}
 | |
| 
 | |
| 		# close '$appendFile':
 | |
| 		close(FILEIN) || die "Can't close file: $!\n";
 | |
| 
 | |
| 		# backup existing contents of '$appendFile':
 | |
| 		# (note that, currently, this will overwrite contents of any existing backup file
 | |
| 		#  with the same  backup file name, even if nothing gets updated/appended further down)
 | |
| 		if ($backupAppendFile) {
 | |
| 			# create suitable backup file name:
 | |
| 			if ($appendFile =~ /\.([^.\n]+)$/) { # if the file has a file name extension
 | |
| 				($appendFileBackup = $appendFile) =~ s/\.([^.\n]+)$/$backupFileNameSuffix.$1/; # add backup suffix in front of the file name extension
 | |
| 			}
 | |
| 			else {
 | |
| 				$appendFileBackup = $appendFile . $backupFileNameSuffix; # add backup suffix at end of file
 | |
| 			}
 | |
| 
 | |
| 			# open backup file in write mode (creates new file if it doesn't exist):
 | |
| 			open(FILEBACKUP, ">", $appendFileBackup) || die "Can't open file '" . $appendFileBackup . "': $!\n";
 | |
| 
 | |
| 			# write existing contents of '$appendFile' to backup file (replacing any previous contents):
 | |
| 			print FILEBACKUP $appendFileString;
 | |
| 
 | |
| 			# close backup file:
 | |
| 			close(FILEBACKUP) || die "Can't close file: $!\n";
 | |
| 		}
 | |
| 
 | |
| 		# add query clause to exclude existing records from search results:
 | |
| 		if (@appendFileSerials) {
 | |
| 			# save current contents of the '-w|--where' option before messing with it further:
 | |
| 			# (it will be needed in the 'append' and 'update' subroutines below)
 | |
| 			$where = $params{'where'};
 | |
| 
 | |
| 			if ($params{'where'} ne '') { $params{'where'} .= " AND "; }
 | |
| 			$params{'where'} .= 'serial NOT RLIKE "^(' . join('|', @appendFileSerials) . ')$"';
 | |
| 		}
 | |
| 	}
 | |
| 	# if no '$appendFile' exists, it will be created by the 'append' subroutine below
 | |
| }
 | |
| 
 | |
| # '-B|--update' functionality:
 | |
| if ($params{'updateRecords'} == 1) {
 | |
| 	$updateRecords = 1;
 | |
| }
 | |
| # remove 'updateRecords' parameter (which we don't need to send to the refbase server):
 | |
| delete($params{'updateRecords'});
 | |
| 
 | |
| # for HTML output, we'll adjust the display type if the '-r|--records' option contains a single record serial number:
 | |
| if (($params{'citeType'} eq 'html') && ($params{'records'} =~ /^[0-9]+$/)) {
 | |
| 	$params{'submit'} = "Display";
 | |
| }
 | |
| 
 | |
| # NOTE: I tried to put all query-related code into a dedicated function but for
 | |
| #       some reason that didn't work with redirects. ?:-/
 | |
| 
 | |
| # initialize new user agent:
 | |
| # (uses LWP::UserAgent)
 | |
| $userAgent = LWP::UserAgent->new;
 | |
| 
 | |
| # set user agent string:
 | |
| $userAgent->agent("refbase/" . $version . " (http://cli.refbase.net/) ");
 | |
| 
 | |
| # allow redirection for 'POST' requests:
 | |
| # (by default, the list of request names that '$userAgent->redirect_ok(...)'
 | |
| #  will allow redirection for is only set to ['GET', 'HEAD'], as per RFC 2616)
 | |
| push @{ $userAgent->requests_redirectable }, 'POST';
 | |
| 
 | |
| # set cookie jar object:
 | |
| # LWP will collect cookies and respond to cookie requests via its cookie jar, thus
 | |
| # enabling the user agent to fetch a PHP session ID from the refbase login response
 | |
| # and automatically resend it upon next request
 | |
| $userAgent->cookie_jar({ file => $cookieJarFile, autosave => 1 });
 | |
| 
 | |
| # attempt to authenticate using the given login credentials:
 | |
| if (($loginParams{'loginEmail'} ne '') && ($loginParams{'loginPassword'} ne '')) {
 | |
| 	$loginSuccessful = &login(0); # call the 'login' subroutine
 | |
| }
 | |
| 
 | |
| # construct URL:
 | |
| # (uses URI::URL)
 | |
| $script = "show.php";
 | |
| $url = url($host . $script);
 | |
| 
 | |
| # build and send GET/POST request:
 | |
| # (uses HTTP::Request::Common & HTTP::Response)
 | |
| 
 | |
| # build POST request using the 'application/x-www-form-urlencoded' content type:
 | |
| $request = POST $url, \%params;
 | |
| 
 | |
| # or, build POST request using the 'multipart/form-data' content type:
 | |
| # $request = POST $url, Content_Type => 'form-data', Content => \%params;
 | |
| 
 | |
| # alternatively, build GET request:
 | |
| # (note that for large URLs, a GET request may cause an "414 Request-URI Too Large" error)
 | |
| # $url->query_form(%params);
 | |
| # $request = GET $url;
 | |
| 
 | |
| # print $request->as_string(); # DEBUG (dumps the GET/POST request)
 | |
| 
 | |
| # send the GET/POST request:
 | |
| $response = $userAgent->request($request); # or use: $response = $userAgent->get($url);
 | |
| 
 | |
| # print returned results:
 | |
| if ($response->is_error()) { # if the request fails, print error message to STDERR:
 | |
| 	print STDERR $response->status_line, "\n";
 | |
| }
 | |
| 
 | |
| elsif (defined($appendFile)) { # if an '$appendFile' was given print results to FILE:
 | |
| 	if ($updateRecords == 1 && @appendFileSerials) {
 | |
| 		&update(0); # update existing records in '$appendFile' with found results
 | |
| 	}
 | |
| 
 | |
| 	$resultsString = $response->content();
 | |
| 
 | |
| 	# TODO: it would be better to use function 'splitRecs' to check whether there
 | |
| 	#       were any results returned for the given format
 | |
| 	if ($resultsString =~ /^Nothing found!\n+/i) {
 | |
| 		$resultsString = '';
 | |
| 	}
 | |
| 
 | |
| 	if (($resultsString eq '') || (($format =~ /^(mods|srw(_mods)?)$/i) && ($resultsString !~ /<mods[ >]/i))) {
 | |
| 		print "There are no records that need to be added to file '" . $appendFile . "'.\n\n";
 | |
| 	}
 | |
| 	else { # if the query returned results which aren't yet in '$appendFile':
 | |
| 		&append(0); # append search results to file
 | |
| 	}
 | |
| }
 | |
| 
 | |
| else { # print results to STDOUT:
 | |
| 	binmode STDOUT;
 | |
| 	print $response->content();
 | |
| }
 | |
| 
 | |
| # --------------------------------------------------------------------------------
 | |
| 
 | |
| # Login with login credentials given in '%loginParams':
 | |
| sub login
 | |
| {
 | |
| 	local ($status) = @_;
 | |
| 
 | |
| 	# construct URL:
 | |
| 	# (uses URI::URL)
 | |
| 	$loginScript = "user_login.php";
 | |
| 	$loginURL = url($host . $loginScript);
 | |
| 
 | |
| 	# send POST request:
 | |
| 	# (uses HTTP::Request::Common & HTTP::Response)
 | |
| 	$loginRequest = POST $loginURL, \%loginParams;
 | |
| 	$loginResponse = $userAgent->request($loginRequest);
 | |
| 
 | |
| 	if ($loginResponse->is_error()) {
 | |
| 		print STDERR $loginResponse->status_line, "\n";
 | |
| 		exit $status;
 | |
| 	}
 | |
| 	else {
 | |
| 		$location = $loginResponse->header('Location');
 | |
| 
 | |
| 		# upon successful login, refbase will redirect to 'index.php'
 | |
| 		if ($location =~ /index.php/) {
 | |
| 			return 1; # login successful
 | |
| 		}
 | |
| 		else {
 | |
| 			return 0; # login NOT successful
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| # --------------------------------------------------------------------------------
 | |
| 
 | |
| # Append search results to file:
 | |
| sub append
 | |
| {
 | |
| 	local ($status) = @_;
 | |
| 	local @newAppendFileData = ();
 | |
| 	$whereAppend = "";
 | |
| 
 | |
| 	# TODO: In order to enable sorting of all records for MODS + SRW, we should remove the
 | |
| 	#       XML file header & footer from the results (and store them in a variable). Then
 | |
| 	#       merge existing & new records, split, sort, join again, and finally put the XML
 | |
| 	#       header & footer back.
 | |
| 	if ($format =~ /^(mods|srw(_mods)?)$/i) {
 | |
| 		# if the '$appendFile' contains MODS XML data:
 | |
| 		if (($format =~ /^mods$/i) && ($appendFileString =~ /<modsCollection/i)) {
 | |
| 			# remove XML file header and '<modsCollection>' opening tag from results:
 | |
| 			$resultsString =~ s/^<\?xml.+?<modsCollection[^>\n]*?>\n//ims;
 | |
| 			# remove '</modsCollection>' closing tag from existing records in '$appendFile':
 | |
| 			$appendFileString =~ s/^<\/modsCollection>//ims;
 | |
| 		}
 | |
| 		# if the '$appendFile' contains SRW_MODS XML data:
 | |
| 		elsif (($format =~ /^srw(_mods)?$/i) && ($appendFileString =~ /<srw:records/i)) {
 | |
| 			# remove XML file header and opening tags from results:
 | |
| 			$resultsString =~ s/^<\?xml.+?<srw:records[^>\n]*?>\n//ims;
 | |
| 			# remove closing tags from existing records in '$appendFile':
 | |
| 			$appendFileString =~ s/^\s*<\/srw:records>.*?<\/srw:searchRetrieveResponse>//ims;
 | |
| 
 | |
| 			# TODO: update values in '<srw:numberOfRecords>' and '<srw:recordPosition>'
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	# append search results to the '$appendFile' contents:
 | |
| 	$newAppendFileString = $appendFileString;
 | |
| 	if (!($sortAppendFileData) && ($format =~ /^bibtex$/i) && ($appendFileString ne '') && ($resultsString ne '')) {
 | |
| 		$newAppendFileString .= "\n";
 | |
| 	}
 | |
| 	$newAppendFileString .= $resultsString;
 | |
| 
 | |
| 	# sort all records by cite key:
 | |
| 	if ($sortAppendFileData) {
 | |
| 		# remove any newlines from end of string:
 | |
| 		$newAppendFileString =~ s/[\n\r]+$//;
 | |
| 
 | |
| 		# split '$appendFile' contents & search results on (format-specific) record delimiters:
 | |
| 		@newAppendFileData = &splitRecs($newAppendFileString);
 | |
| 
 | |
| 		# sort array of records by cite key:
 | |
| 		@newAppendFileData = &sortRecs(@newAppendFileData);
 | |
| 
 | |
| 		# merge again records into a string:
 | |
| 		if ($format =~ /^bibtex$/i) {
 | |
| 			$recDelim = "\n\n";
 | |
| 		}
 | |
| 		else {
 | |
| 			$recDelim = "\n";
 | |
| 		}
 | |
| 		$newAppendFileString = join($recDelim, @newAppendFileData) . "\n";
 | |
| 	}
 | |
| 
 | |
| 	# open '$appendFile' in write mode (creates new file if it doesn't exist):
 | |
| 	open(FILEADD, ">", $appendFile) || die "Can't open file '" . $appendFile . "': $!\n";
 | |
| 
 | |
| 	# write existing contents & new results back to '$appendFile':
 | |
| 	print FILEADD $newAppendFileString;
 | |
| 
 | |
| 	# close '$appendFile':
 | |
| 	close(FILEADD) || die "Can't close file: $!\n";
 | |
| 
 | |
| 	# to give some feedback, we output all appended records as citations to STDOUT:
 | |
| 	# TODO: move into a dedicated function if possible (compare with 'update' subroutine)
 | |
| 	if ($reportResults) {
 | |
| 		$params{'submit'} = "Cite";
 | |
| 		$params{'citeType'} = "ascii";
 | |
| 		$params{'showRows'} = "99999";
 | |
| 		$params{'headerMsg'} = "Added records:";
 | |
| 
 | |
| 		if ($where ne '') { $whereAppend = $where . " AND "; }
 | |
| 
 | |
| 		if (($appendFileString ne '') && (@appendFileSerials)) { # '$appendFile' contains some records
 | |
| 			# add query clause to exclude existing records from search results:
 | |
| 			$whereAppend .= 'serial NOT RLIKE "^(' . join('|', @appendFileSerials) . ')$"';
 | |
| 		}
 | |
| 		else { # '$appendFile' didn't exist or was empty
 | |
| 			# extract all refbase serial numbers from '$resultsString' into an array:
 | |
| 			while ($resultsString =~ /(?<=show\.php\?record=)(\d+)/g) {
 | |
| 				push(@appendFileSerials, $1);
 | |
| 			}
 | |
| 			# add query clause to display all records from the current search results:
 | |
| 			$whereAppend .= 'serial RLIKE "^(' . join('|', @appendFileSerials) . ')$"';
 | |
| 		}
 | |
| 
 | |
| 		$params{'where'} = $whereAppend;
 | |
| 
 | |
| 		# construct URL:
 | |
| 		# (uses URI::URL)
 | |
| 		$appendFeedbackURL = url($host . $script);
 | |
| 
 | |
| 		# send POST request:
 | |
| 		# (uses HTTP::Request::Common & HTTP::Response)
 | |
| 		$appendFeedbackRequest = POST $appendFeedbackURL, \%params;
 | |
| 		$appendFeedbackResponse = $userAgent->request($appendFeedbackRequest);
 | |
| 
 | |
| 		print $appendFeedbackResponse->content();
 | |
| 	}
 | |
| }
 | |
| 
 | |
| # --------------------------------------------------------------------------------
 | |
| 
 | |
| # Update existing records (given in '$appendFile') with found results:
 | |
| sub update
 | |
| {
 | |
| 	local ($status) = @_;
 | |
| 	local @appendFileData = ();
 | |
| 	local @newAppendFileData = ();
 | |
| 	@updatedSerials = ();
 | |
| 	$whereUpdate = "";
 | |
| 
 | |
| 	# add query clause to restrict search results to records existing in '$appendFile':
 | |
| 	if ($where ne '') { $whereUpdate = $where . " AND "; }
 | |
| 	$whereUpdate .= 'serial RLIKE "^(' . join('|', @appendFileSerials) . ')$"';
 | |
| 	$params{'where'} = $whereUpdate;
 | |
| 
 | |
| 	# fetch all records that match the given query AND which exist in '$appendFile':
 | |
| 
 | |
| 	# construct URL:
 | |
| 	# (uses URI::URL)
 | |
| 	$updateURL = url($host . $script);
 | |
| 
 | |
| 	# send POST request:
 | |
| 	# (uses HTTP::Request::Common & HTTP::Response)
 | |
| 	$updateRequest = POST $updateURL, \%params;
 | |
| 	$updateResponse = $userAgent->request($updateRequest);
 | |
| 
 | |
| 	$updateResultsString = $updateResponse->content();
 | |
| 
 | |
| 	# if the '$appendFile' contains MODS XML data:
 | |
| 	if (($format =~ /^mods$/i) && ($appendFileString =~ /<modsCollection/i)) {
 | |
| 		# remove XML file header and '<modsCollection>' opening tag from results:
 | |
| 		$updateResultsString =~ s/^<\?xml.+?<modsCollection[^>\n]*?>\n//ims;
 | |
| 		# remove '</modsCollection>' closing tag from results:
 | |
| 		$updateResultsString =~ s/^<\/modsCollection>//ims;
 | |
| 	}
 | |
| 	# if the '$appendFile' contains SRW_MODS XML data:
 | |
| 	elsif (($format =~ /^srw(_mods)?$/i) && ($appendFileString =~ /<srw:records/i)) {
 | |
| 		# remove XML file header and opening tags from results:
 | |
| 		$updateResultsString =~ s/^<\?xml.+?<srw:records[^>\n]*?>\n//ims;
 | |
| 		# remove closing tags from results:
 | |
| 		$updateResultsString =~ s/^\s*<\/srw:records>.*?<\/srw:searchRetrieveResponse>//ims;
 | |
| 
 | |
| 		# TODO: update values in '<srw:numberOfRecords>' and '<srw:recordPosition>'
 | |
| 	}
 | |
| 
 | |
| 	# split search results on (format-specific) record delimiters:
 | |
| 	@updateResultsData = &splitRecs($updateResultsString);
 | |
| 
 | |
| 	# extract refbase serial number & UNIX time stamp from returned records:
 | |
| 	%updateResultsRecords = &parseRecs(@updateResultsData);
 | |
| 
 | |
| 	# split '$appendFile' contents on (format-specific) record delimiters:
 | |
| 	@appendFileData = &splitRecs($appendFileString);
 | |
| 
 | |
| 	# replace existing records with new ones:
 | |
| 	foreach $record (@appendFileData) { # for each of the records existing in '$appendFile'
 | |
| 		# NOTE: instead of looping over all existing records, it would probably be more effective
 | |
| 		#       to loop over each of the newly fetched (updated) records instead, and directly replace
 | |
| 		#       all records in '$appendFileString' that have a matching serial number, but I couldn't
 | |
| 		#       get this working correctly...
 | |
| 
 | |
| 		# if this record contains a refbase serial number & UNIX time stamp:
 | |
| 		if ($record =~ /(?<=show\.php\?record=)(\d+)[^\d\n]+(\d{2}) ([[:alpha:]]{3}) (\d{4}) (\d{2}):(\d{2}):(\d{2})/i) {
 | |
| 			$serial = $1;
 | |
| 
 | |
| 			if (exists($updateResultsRecords{$serial})) { # if one of the newly fetched (updated) records has an identical serial number
 | |
| 				# extract time stamp from existing record:
 | |
| 				$monthday = $2;
 | |
| 				$month = $months{$3}; # 0 == January
 | |
| 				$year = $4 - 1900; # by default, Perl counts years from 1900
 | |
| 				$hour = $5;
 | |
| 				$min = $6;
 | |
| 				$sec = $7;
 | |
| 
 | |
| 				# convert existing record's time stamp to epoch seconds:
 | |
| 				$modifiedDateExisting = timelocal($sec, $min, $hour, $monthday, $month, $year); # uses Time::Local
 | |
| 
 | |
| 				# extract source data & time stamp from the matching updated record:
 | |
| 				$updatedRecord = @{$updateResultsRecords{$serial}}[0];
 | |
| 				$modifiedDateUpdated = @{$updateResultsRecords{$serial}}[1];
 | |
| 
 | |
| 				if ($modifiedDateUpdated > $modifiedDateExisting) { # if the updated record has a more recent time stamp
 | |
| 					# replace existing record with updated record:
 | |
| 					# (variables '$recStart' and '$recEnd' get defined in subroutine 'splitRecs')
 | |
| 					$record =~ s/^$recStart.+?exported from refbase.+?show\.php\?record=$serial.+?$recEnd$/$updatedRecord/ims;
 | |
| 
 | |
| 					push(@updatedSerials, $serial);
 | |
| 				}
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		push(@newAppendFileData, $record);
 | |
| 	}
 | |
| 
 | |
| 	if (! @updatedSerials) {
 | |
| 		print "There are no records that need to be updated in file '" . $appendFile . "'.\n\n";
 | |
| 	}
 | |
| 	else {
 | |
| 		# update existing records in variable '$appendFileString':
 | |
| 		if ($format =~ /^bibtex$/i) {
 | |
| 			$recDelim = "\n\n";
 | |
| 		}
 | |
| 		else {
 | |
| 			$recDelim = "\n";
 | |
| 		}
 | |
| 		$appendFileString = join($recDelim, @newAppendFileData);
 | |
| 
 | |
| 		# if the initial query did not return results which aren't yet in '$appendFile':
 | |
| 		# (i.e. if the 'append' subroutine won't get triggered)
 | |
| 		if (($resultsString eq '') || (($format =~ /^(mods|srw(_mods)?)$/i) && ($resultsString !~ /<mods[ >]/i))) {
 | |
| 			# open '$appendFile' in write mode:
 | |
| 			open(FILEOUT, ">", $appendFile) || die "Can't open file '" . $appendFile . "': $!\n";
 | |
| 
 | |
| 			# write back updated file:
 | |
| 			print FILEOUT $appendFileString;
 | |
| 
 | |
| 			# close '$appendFile':
 | |
| 			close(FILEOUT) || die "Can't close file: $!\n";
 | |
| 		}
 | |
| 		# otherwise, the 'append' subroutine will write contents of '$appendFileString' back to '$appendFile'
 | |
| 
 | |
| 		# to give some feedback, we output all updated records as citations to STDOUT:
 | |
| 		# TODO: move into a dedicated function if possible (compare with 'append' subroutine)
 | |
| 		if ($reportResults) {
 | |
| 			$params{'submit'} = "Cite";
 | |
| 			$params{'citeType'} = "ascii";
 | |
| 			$params{'showRows'} = "99999";
 | |
| 			$params{'headerMsg'} = "Updated records:";
 | |
| 			$whereUpdate = "";
 | |
| 
 | |
| 			# add query clause to restrict search results to updated records:
 | |
| 			if ($where ne '') { $whereUpdate = $where . " AND "; }
 | |
| 			$whereUpdate .= 'serial RLIKE "^(' . join('|', @updatedSerials) . ')$"';
 | |
| 			$params{'where'} = $whereUpdate;
 | |
| 
 | |
| 			# construct URL:
 | |
| 			# (uses URI::URL)
 | |
| 			$updateFeedbackURL = url($host . $script);
 | |
| 
 | |
| 			# send POST request:
 | |
| 			# (uses HTTP::Request::Common & HTTP::Response)
 | |
| 			$updateFeedbackRequest = POST $updateFeedbackURL, \%params;
 | |
| 			$updateFeedbackResponse = $userAgent->request($updateFeedbackRequest);
 | |
| 
 | |
| 			print $updateFeedbackResponse->content();
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| # --------------------------------------------------------------------------------
 | |
| 
 | |
| # Split '$sourceText' on format-specific record delimiters:
 | |
| # (note that, currently, only formats 'bibtex', 'mods' and 'srw_mods' are supported)
 | |
| sub splitRecs
 | |
| {
 | |
| 	local ($sourceText) = @_;
 | |
| 	local (@records) = ();
 | |
| 
 | |
| 	if ($format =~ /^(bibtex|mods|srw(_mods)?)$/i) {
 | |
| 		# define format-specific strings that open/close a record:
 | |
| 		# (note that '...' must be used here instead of "...", see Friedl regex book, 1st ed., p.300+)
 | |
| 		if ($format =~ /^bibtex$/i) {
 | |
| 			$recStart = '@';
 | |
| 			$recEnd = '\}';
 | |
| 		}
 | |
| 		elsif ($format =~ /^mods$/i) {
 | |
| 			$recStart = '\s*<mods[ >]';
 | |
| 			$recEnd = 'mods>';
 | |
| 		}
 | |
| 		elsif ($format =~ /^srw(_mods)?$/i) {
 | |
| 			$recStart = '\s*<srw:record[ >]';
 | |
| 			$recEnd = 'srw:record>';
 | |
| 		}
 | |
| 
 | |
| 		@records = split(/(?<=$recEnd)\s*\n(?=$recStart)/m, $sourceText);
 | |
| 	}
 | |
| 
 | |
| 	return @records;
 | |
| }
 | |
| 
 | |
| # --------------------------------------------------------------------------------
 | |
| 
 | |
| # Extract records containing a refbase serial number & UNIX time stamp from '@records':
 | |
| # Returns a hash of records where each hash element
 | |
| # - is keyed by the record's serial number
 | |
| # - contains a reference to an unnamed array which holds two array elements:
 | |
| #   - the record's source data
 | |
| #   - the modified date/time stamp converted to epoch seconds
 | |
| sub parseRecs
 | |
| {
 | |
| 	local (@records) = @_;
 | |
| 	%refbaseRecords = ();
 | |
| 
 | |
| 	foreach $record (@records) {
 | |
| 		# extract refbase serial number & UNIX time stamp from this record:
 | |
| 		if ($record =~ /(?<=show\.php\?record=)(\d+)[^\d\n]+(\d{2}) ([[:alpha:]]{3}) (\d{4}) (\d{2}):(\d{2}):(\d{2})/i) {
 | |
| 			$serial = $1;
 | |
| 			$monthday = $2;
 | |
| 			$month = $months{$3}; # 0 == January
 | |
| 			$year = $4 - 1900; # by default, Perl counts years from 1900
 | |
| 			$hour = $5;
 | |
| 			$min = $6;
 | |
| 			$sec = $7;
 | |
| 
 | |
| 			# add modified date to array (after converting to epoch seconds):
 | |
| 			$modifiedDate = timelocal($sec, $min, $hour, $monthday, $month, $year); # uses Time::Local
 | |
| 
 | |
| 			$record =~ s/\s+$//; # remove any trailing whitespace
 | |
| 
 | |
| 			# note that if '@records' contains several records with the same refbase
 | |
| 			# serial number, only the last one will be included in '%refbaseRecords'
 | |
| 			# (and thus only the last one will get updated by the 'update' subroutine)
 | |
| 			push(@{$refbaseRecords{$serial}}, $record);
 | |
| 			push(@{$refbaseRecords{$serial}}, $modifiedDate);
 | |
| 			# NOTE: by storing a reference to an array we can add multiple values per hash key (see Perl Cookbook 5.7)
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return %refbaseRecords;
 | |
| }
 | |
| 
 | |
| # --------------------------------------------------------------------------------
 | |
| 
 | |
| # Sort array of records by cite key:
 | |
| # (note that, currently, only formats 'bibtex', 'endnote', 'ris' 'mods' and 'srw_mods' are supported)
 | |
| sub sortRecs
 | |
| {
 | |
| 	local (@records) = @_;
 | |
| 	%keyedRecordData = ();
 | |
| 	@sortedRecordData = ();
 | |
| 	$i = 1;
 | |
| 
 | |
| 	$fileExtension = "default"; # triggers default regex patterns for extraction of cite IDs
 | |
| 	# use file-specific regex patterns for extraction of cite IDs:
 | |
| 	if ($format =~ /^(bibtex|endnote|ris|mods|srw(_mods)?)$/i) {
 | |
| 		if ($format =~ /^bibtex$/i) {
 | |
| 			$fileExtension = "bib";
 | |
| 		}
 | |
| 		elsif ($format =~ /^endnote$/i) {
 | |
| 			$fileExtension = "enw";
 | |
| 		}
 | |
| 		elsif ($format =~ /^ris$/i) {
 | |
| 			$fileExtension = "ris";
 | |
| 		}
 | |
| 		elsif ($format =~ /^(mods|srw(_mods)?)$/i) {
 | |
| 			$fileExtension = "xml";
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	$citeIDRegex = @{$citeIDPatterns{$fileExtension}}[0]; # get regex pattern that matches the cite IDs in the records
 | |
| 	$citeIDNum   = @{$citeIDPatterns{$fileExtension}}[1]; # get number of the sub-pattern that captures the cite IDs
 | |
| 
 | |
| 	foreach $record (@records) {
 | |
| 		# extract cite IDs and use them as hash keys:
 | |
| 		# (we always append an incrementing number to ensure unique keys; the number
 | |
| 		# is padded with leading zeros in order to allow for correct string sorting)
 | |
| 		if ($record =~ /$citeIDRegex/msg) {
 | |
| 			$citeID = $$citeIDNum . sprintf("-%06d", $i++);
 | |
| 		}
 | |
| 		else { # no cite ID found, so we just use an incrementing number as hash key
 | |
| 			$citeID = sprintf("%06d", $i++);
 | |
| 		}
 | |
| 		$keyedRecordData{$citeID} = $record;
 | |
| 	}
 | |
| 
 | |
| 	@sortedKeys = sort keys %keyedRecordData;
 | |
| 
 | |
| 	foreach $key (@sortedKeys) {
 | |
| 		push(@sortedRecordData, $keyedRecordData{$key});
 | |
| 	}
 | |
| 	
 | |
| 	return @sortedRecordData;
 | |
| }
 | |
| 
 | |
| # --------------------------------------------------------------------------------
 | |
| 
 | |
| # Remove any duplicate items from '@array':
 | |
| sub uniquify
 | |
| {
 | |
| 	local (@array) = @_;
 | |
| 	local (%unique) = ();
 | |
| 
 | |
| 	foreach $item (@array) {
 | |
| 		$unique{$item}++;
 | |
| 	}
 | |
| 
 | |
| 	return (sort keys %unique);
 | |
| }
 | |
| 
 | |
| # --------------------------------------------------------------------------------
 | |
| 
 | |
| # Print usage and exit:
 | |
| sub usage
 | |
| {
 | |
| 	local ($status) = @_;
 | |
| 
 | |
| 	print "\nrefbase command line client, v" . $version . " by Matthias Steffens, http://cli.refbase.net/\n\n"
 | |
| 
 | |
| 		. "Usage:   refbase [OPTIONS]\n\n"
 | |
| 
 | |
| 		. "Notes:   - At least one query option must be given and unrecognized options will be ignored.\n"
 | |
| 		. "         - If multiple options are given, they will by default be connected with 'AND'. Use\n"
 | |
| 		. "           '--query=or' to connect multiple options with 'OR'.\n"
 | |
| 		. "         - Options syntax: [OPTION]=[VALUE], e.g. '-a=steffens' or '--author=\"steffens, m\"'.\n"
 | |
| 		. "         - Returns up to '--rows' number of records beginning with '--start'. If all given\n"
 | |
| 		. "           query options are empty, all database records will be returned.\n"
 | |
| 		. "         - Note that '--records' assumes a list of full record serials separated by non-digit\n"
 | |
| 		. "           characters while '--serial' allows for partial matches.\n"
 | |
| 		. "         - For each option, default values can be specified at the top of the script.\n"
 | |
| 		. "           Current defaults are given in parentheses.\n\n"
 | |
| 
 | |
| 		. "General Options:   -h, --help        - display this help text\n"
 | |
| 		. "                   -v, --version     - display version information\n"
 | |
| 		. "                   -X, --examples    - display usage examples\n\n"
 | |
| 
 | |
| 		. "Query Options:     -a, --author      - search author field ('" . $params{'author'} . "')\n"
 | |
| 		. "                   -b, --abstract    - search abstract field ('" . $params{'abstract'} . "')\n"
 | |
| 		. "                   -c, --citekey     - search cite_key field, requires '-u, --userid' ('" . $params{'cite_key'} . "')\n"
 | |
| 		. "                   -d, --date        - search by creation date ('" . $params{'date'} . "')\n"
 | |
| 		. "                   -e, --area        - search area field ('" . $params{'area'} . "')\n"
 | |
| 		. "                   -f, --thesis      - search thesis field ('" . $params{'thesis'} . "')\n"
 | |
| 		. "                   -i, --contribid   - search contribution_id field ('" . $params{'contribution_id'} . "')\n"
 | |
| 		. "                   -j, --journal     - search abbrev_journal field ('" . $params{'abbrev_journal'} . "')\n"
 | |
| 		. "                   -k, --keywords    - search keywords field ('" . $params{'keywords'} . "')\n"
 | |
| 		. "                   -l, --location    - search location field ('" . $params{'location'} . "')\n"
 | |
| 		. "                   -m, --marked      - search marked field, requires '-u, --userid' ('" . $params{'ismarked'} . "')\n"
 | |
| 		. "                   -n, --notes       - search notes field ('" . $params{'notes'} . "')\n"
 | |
| 		. "                   -p, --publication - search publication field ('" . $params{'publication'} . "')\n"
 | |
| 		. "                   -q, --query       - query type, possible values: and, or ('" . $params{'queryType'} . "')\n"
 | |
| 		. "                   -r, --records     - search serial field ('" . $params{'records'} . "')\n"
 | |
| 		. "                   -s, --selected    - search selected field, requires '-u, --userid' ('" . $params{'selected'} . "')\n"
 | |
| 		. "                   -t, --title       - search title field ('" . $params{'title'} . "')\n"
 | |
| 		. "                   -u, --userid      - join with user-specific data from user ID ('" . $params{'userID'} . "')\n"
 | |
| 		. "                   -w, --where       - search by using a raw sql where clause ('" . $params{'where'} . "')\n"
 | |
| 		. "                   -x, --type        - search type field ('" . $params{'type'} . "')\n"
 | |
| 		. "                   -y, --year        - search year field ('" . $params{'year'} . "')\n"
 | |
| 		. "                   -z, --serial      - search serial field (partial matches) ('" . $params{'serial'} . "')\n\n"
 | |
| 
 | |
| 		. "Output Options:    -A, --append      - file to which returned records are appended ('" . $params{'appendFile'} . "')\n"
 | |
| 		. "                                       requires '-F, --format': bibtex, mods, srw_mods\n"
 | |
| 		. "                   -B, --update      - update existing records in '-A, --append' file ('" . $params{'updateRecords'} . "')\n"
 | |
| 		. "                                       possible values: 0, 1\n"
 | |
| 		. "                   -C, --style       - citation style ('" . $params{'citeStyle'} . "')\n"
 | |
| 		. "                   -E, --extract     - file from which citation IDs are extracted ('" . $params{'extractFile'} . "')\n"
 | |
| 		. "                                       supported file types: " . join(', ', sort keys(%citeIDPatterns)) . "\n"
 | |
| 		. "                   -F, --format      - output format ('" . $params{'format'} . "')\n"
 | |
| 		. "                                       possible values: html, rtf, pdf, latex, latex_bbl, markdown, ascii,\n"
 | |
| 		. "                                                        ads, bibtex, endnote, isi, ris, atom, mods, oai_dc,\n"
 | |
| 		. "                                                        odf, srw_dc, srw_mods, word\n"
 | |
| 		. "                   -L, --showlinks   - hide/display links column in html output ('" . $params{'showLinks'} . "')\n"
 | |
| 		. "                                       possible values: 0, 1\n"
 | |
| 		. "                   -O, --order       - sort order of returned records ('" . $params{'citeOrder'} . "')\n"
 | |
| 		. "                                       possible values: author, year, type, type-year, creation-date\n"
 | |
| 		. "                   -Q, --showquery   - hide/display SQL query in ASCII output ('" . $params{'showQuery'} . "')\n"
 | |
| 		. "                                       possible values: 0, 1\n"
 | |
| 		. "                   -R, --rows        - number of records to be returned ('" . $params{'showRows'} . "')\n"
 | |
| 		. "                   -S, --start       - number of first record to be returned ('" . $params{'startRecord'} . "')\n"
 | |
| 		. "                   -V, --view        - view type of html output ('" . $params{'viewType'} . "')\n"
 | |
| 		. "                                       possible values: web, print, mobile\n\n"
 | |
| 
 | |
| 		. "Server Options:    -H, --host        - URL of the refbase database ('" . $host . "')\n"
 | |
| 		. "                                       defined shortcuts: " . join(', ', sort keys(%hosts)) . "\n"
 | |
| 		. "                   -P, --password    - password for given '-U, --user' account";
 | |
| 
 | |
| 	if ($loginParams{'loginPassword'} ne '') {
 | |
| 		print "\n                                       (a default pwd has been defined)\n";
 | |
| 	}
 | |
| 	else {
 | |
| 		print " ('')\n";
 | |
| 	}
 | |
| 
 | |
| 	print "                   -U, --user        - login email address of an existing refbase user\n"
 | |
| 		. "                                       ('" . $loginParams{'loginEmail'} . "')\n\n";
 | |
| 
 | |
| 	exit $status;
 | |
| }
 | |
| 
 | |
| # --------------------------------------------------------------------------------
 | |
| 
 | |
| # Print version number and exit:
 | |
| sub version
 | |
| {
 | |
| 	local ($status) = @_;
 | |
| 	print "\nrefbase command line client, version " . $version
 | |
| 		. "\ncheck for updates at http://cli.refbase.net/\n\n";
 | |
| 	exit $status;
 | |
| }
 | |
| 
 | |
| # --------------------------------------------------------------------------------
 | |
| 
 | |
| # Print examples and exit:
 | |
| sub examples
 | |
| {
 | |
| 	local ($status) = @_;
 | |
| 	print <<'END_EXAMPLES';
 | |
| 
 | |
|  --------------------------------------------------------------------------------
 | |
|  REFBASE USAGE EXAMPLES:
 | |
|  --------------------------------------------------------------------------------
 | |
| 
 | |
|  1) Find all records where the author field contains 'mock' AND the year field
 | |
|     contains '2005':
 | |
|  
 | |
|     refbase -a=mock -y=2005
 | |
|  
 | |
|  --------------------------------------------------------------------------------
 | |
| 
 | |
|  2) Find all records where the author field contains 'mock' OR the title field
 | |
|     contains 'photo', and display 10 records starting with the 21st record in the
 | |
|     result set:
 | |
|  
 | |
|     refbase -a=mock -t=photo -q=or -R=10 -S=21
 | |
|  
 | |
|  --------------------------------------------------------------------------------
 | |
| 
 | |
|  3) Export records with serial numbers '1', '12' and '34' to Endnote format and
 | |
|     save them to a file named 'export.enw':
 | |
|  
 | |
|     refbase -r=1,12,34 -F=endnote > export.enw
 | |
|  
 | |
|  --------------------------------------------------------------------------------
 | |
| 
 | |
|  4) Return up to 50 records that were selected by a user with a user ID '2' in
 | |
|     RTF format using citation style "Ann Glaciol" and sorting them first by
 | |
|     record type, then by year, and save results to a file named 'citations.rtf':
 | |
|  
 | |
|     refbase -s=yes -u=2 -R=50 -F=rtf -C="Ann Glaciol" -O=type-year > citations.rtf
 | |
|  
 | |
|  --------------------------------------------------------------------------------
 | |
| 
 | |
|  5) Find all records which were modified today by a user named "admin" and where
 | |
|     the location field contains 'msteffens' (note the use of the '-w' option to
 | |
|     specify a custom WHERE clause):
 | |
|  
 | |
|     refbase -w='modified_date = CURDATE() AND modified_by RLIKE "admin"' -l=msteffens
 | |
|  
 | |
|  --------------------------------------------------------------------------------
 | |
| 
 | |
|  6) Find all records where the cite_key field (of a user with a user ID '2')
 | |
|     contains 'steffens', and append records in MODS XML format to file 'mods.xml'
 | |
|     if they don't yet exist in that file:
 | |
|  
 | |
|     refbase -u=2 -c=steffens -F=mods -A=mods.xml
 | |
|  
 | |
|  --------------------------------------------------------------------------------
 | |
| 
 | |
|  7) Find all records where the contribution_id field contains 'AWI' and where
 | |
|     the keywords field contains 'seaweeds', and append records in BibTeX format
 | |
|     to file 'paper.bib' if they don't yet exist in that file. In case found
 | |
|     records already exist in file 'paper.bib', update them if their modification
 | |
|     date is more recent:
 | |
|  
 | |
|     refbase -i=AWI -k=seaweeds -F=bibtex -A=paper.bib -B=1
 | |
|  
 | |
|  --------------------------------------------------------------------------------
 | |
| 
 | |
|  8) Extract all citation IDs from file 'paper.aux', and append matching records
 | |
|     (for a user with a user ID '2') in BibTeX format to file 'paper.bib' if they
 | |
|     don't yet exist in that file. In case found records already exist in file
 | |
|     'paper.bib', update them if their modification date is more recent:
 | |
|  
 | |
|     refbase -u=2 -E=paper.aux -F=bibtex -A=paper.bib -B=1
 | |
|  
 | |
|  --------------------------------------------------------------------------------
 | |
| 
 | |
|  9) Extract all citation IDs from file 'bibtex.bbl', and save matching records
 | |
|     (for a user with a user ID '2') in LaTeX bibliography (.bbl) format to file
 | |
|     'refbase.bbl' using the "APA" citation style (the .bbl file generated by
 | |
|     refbase can be used as a replacement of the BibTeX-generated .bbl file):
 | |
|  
 | |
|     refbase -u=2 -E=bibtex.bbl -F=latex_bbl -C=APA > refbase.bbl
 | |
|  
 | |
|  --------------------------------------------------------------------------------
 | |
| 
 | |
| END_EXAMPLES
 | |
| 	exit $status;
 | |
| }
 | |
| 
 | |
| __END__
 |