1 package net.sf.jabref.imports;
3 import java.io.BufferedReader;
4 import java.io.IOException;
5 import java.io.InputStream;
7 import java.io.UnsupportedEncodingException;
8 import java.net.HttpURLConnection;
10 import java.net.URLEncoder;
11 import java.util.Date;
13 import javax.swing.JOptionPane;
14 import javax.swing.JPanel;
15 import javax.xml.parsers.ParserConfigurationException;
16 import javax.xml.parsers.SAXParser;
17 import javax.xml.parsers.SAXParserFactory;
19 import net.sf.jabref.BibtexEntry;
20 import net.sf.jabref.BibtexEntryType;
21 import net.sf.jabref.GUIGlobals;
22 import net.sf.jabref.Globals;
23 import net.sf.jabref.JabRefFrame;
24 import net.sf.jabref.Util;
25 import net.sf.jabref.gui.ImportInspectionDialog;
27 import org.xml.sax.SAXException;
28 import org.xml.sax.helpers.DefaultHandler;
32 * This class can be used to access any archive offering an OAI2 interface. By
33 * default it will access ArXiv.org
35 * @author Ulrich Stärk
36 * @author Christian Kopf
38 * @version $Revision: 1970 $ ($Date: 2007-03-10 20:22:06 +0100 (Sat, 10 Mar 2007) $)
41 public class OAI2Fetcher implements EntryFetcher, Runnable {
43 public static final String OAI2_ARXIV_PREFIXIDENTIFIER = "oai%3AarXiv.org%3A";
45 public static final String OAI2_ARXIV_HOST = "arxiv.org";
47 public static final String OAI2_ARXIV_SCRIPT = "oai2";
49 public static final String OAI2_ARXIV_METADATAPREFIX = "arXiv";
51 public static final String OAI2_ARXIV_ARCHIVENAME = "ArXiv.org";
53 public static final String OAI2_IDENTIFIER_FIELD = "oai2identifier";
55 private SAXParserFactory parserFactory;
57 private SAXParser saxParser;
59 private String oai2Host;
61 private String oai2Script;
63 private String oai2MetaDataPrefix;
65 private String oai2PrefixIdentifier;
67 private String oai2ArchiveName;
69 private boolean shouldContinue = true;
73 private ImportInspectionDialog dialog;
75 private JabRefFrame frame;
77 /* some archives - like arxive.org - might expect of you to wait some time */
79 private boolean shouldWait() {
83 private long waitTime = -1;
85 private Date lastCall;
91 * the host to query without leading http:// and without trailing /
93 * the relative location of the oai2 interface without leading
95 * @param oai2Metadataprefix
96 * the urlencoded metadataprefix
97 * @param oai2Prefixidentifier
98 * the urlencoded prefix identifier
100 * Time to wait in milliseconds between query-requests.
102 public OAI2Fetcher(String oai2Host, String oai2Script, String oai2Metadataprefix,
103 String oai2Prefixidentifier, String oai2ArchiveName, long waitTimeMs) {
104 this.oai2Host = oai2Host;
105 this.oai2Script = oai2Script;
106 this.oai2MetaDataPrefix = oai2Metadataprefix;
107 this.oai2PrefixIdentifier = oai2Prefixidentifier;
108 this.oai2ArchiveName = oai2ArchiveName;
109 this.waitTime = waitTimeMs;
111 parserFactory = SAXParserFactory.newInstance();
112 saxParser = parserFactory.newSAXParser();
113 } catch (ParserConfigurationException e) {
115 } catch (SAXException e) {
121 * Default Constructor. The archive queried will be ArXiv.org
124 public OAI2Fetcher() {
125 this(OAI2_ARXIV_HOST, OAI2_ARXIV_SCRIPT, OAI2_ARXIV_METADATAPREFIX,
126 OAI2_ARXIV_PREFIXIDENTIFIER, OAI2_ARXIV_ARCHIVENAME, 20000L);
130 * Construct the query URL
133 * The key of the OAI2 entry that the url should poitn to.
135 * @return a String denoting the query URL
137 public String constructUrl(String key) {
138 String identifier = "";
140 identifier = URLEncoder.encode((String) key, "UTF-8");
141 } catch (UnsupportedEncodingException e) {
144 StringBuffer sb = new StringBuffer("http://").append(oai2Host).append("/");
145 sb.append(oai2Script).append("?");
146 sb.append("verb=GetRecord");
147 sb.append("&identifier=");
148 sb.append(oai2PrefixIdentifier);
149 sb.append(identifier);
150 sb.append("&metadataPrefix=").append(oai2MetaDataPrefix);
151 return sb.toString();
155 * Strip subccategories from ArXiv key.
157 * @param key The key to fix.
160 public static String fixKey(String key){
161 int dot = key.indexOf('.');
162 int slash = key.indexOf('/');
164 if (dot > -1 && dot < slash)
165 key = key.substring(0, dot) + key.substring(slash, key.length());
170 public static String correctLineBreaks(String s){
171 s = s.replaceAll("\\n(?!\\s*\\n)", " ");
172 s = s.replaceAll("\\s*\\n\\s*", "\n");
173 return s.replaceAll(" {2,}", " ").replaceAll("(^\\s*|\\s+$)", "");
177 * Import an entry from an OAI2 archive. The BibtexEntry provided has to
178 * have the field OAI2_IDENTIFIER_FIELD set to the search string.
181 * The OAI2 key to fetch from ArXiv.
182 * @return The imnported BibtexEntry or null if none.
184 public BibtexEntry importOai2Entry(String key) {
186 * Fix for problem reported in mailing-list:
187 * https://sourceforge.net/forum/message.php?msg_id=4087158
191 String url = constructUrl(key);
193 URL oai2Url = new URL(url);
194 HttpURLConnection oai2Connection = (HttpURLConnection) oai2Url.openConnection();
195 oai2Connection.setRequestProperty("User-Agent", "Jabref");
196 InputStream inputStream = oai2Connection.getInputStream();
198 /* create an empty BibtexEntry and set the oai2identifier field */
199 BibtexEntry be = new BibtexEntry(Util.createNeutralId(), BibtexEntryType.ARTICLE);
200 be.setField(OAI2_IDENTIFIER_FIELD, key);
201 DefaultHandler handlerBase = new OAI2Handler(be);
202 /* parse the result */
203 saxParser.parse(inputStream, handlerBase);
205 /* Correct line breaks and spacing */
206 Object[] fields = be.getAllFields();
207 for (int i = 0; i < fields.length; i++){
208 String name = fields[i].toString();
210 be.setField(name, correctLineBreaks(be.getField(name).toString()));
213 } catch (IOException e) {
214 JOptionPane.showMessageDialog(frame, Globals.lang(
215 "An Exception ocurred while accessing '%0'", url)
216 + "\n\n" + e.toString(), Globals.lang(getKeyName()), JOptionPane.ERROR_MESSAGE);
217 } catch (SAXException e) {
218 JOptionPane.showMessageDialog(frame, Globals.lang(
219 "An SAXException ocurred while parsing '%0':", new String[]{url})
220 + "\n\n" + e.getMessage(), Globals.lang(getKeyName()), JOptionPane.ERROR_MESSAGE);
221 } catch (RuntimeException e){
222 JOptionPane.showMessageDialog(frame, Globals.lang(
223 "An Error occurred while fetching from OAI2 source (%0):", new String[]{url})
224 + "\n\n" + e.getMessage(), Globals.lang(getKeyName()), JOptionPane.ERROR_MESSAGE);
229 public String getHelpPage() {
230 // there is no helppage
234 public URL getIcon() {
235 return GUIGlobals.getIconUrl("www");
238 public String getKeyName() {
239 return "Fetch " + oai2ArchiveName;
242 public JPanel getOptionsPanel() {
243 // we have no additional options
247 public String getTitle() {
248 return Globals.menuTitle(getKeyName());
251 public void processQuery(String query, ImportInspectionDialog dialog, JabRefFrame frame) {
253 this.dialog = dialog;
255 (new Thread(this)).start();
258 public void cancelled() {
259 shouldContinue = false;
262 public void done(int entriesImported) {
266 public void stopFetching() {
267 shouldContinue = false;
272 dialog.setVisible(true);
273 shouldContinue = true;
274 /* multiple keys can be delimited by ; or space */
275 query = query.replaceAll(" ", ";");
276 String[] keys = query.split(";");
277 for (int i = 0; i < keys.length; i++) {
278 String key = keys[i];
280 * some archives - like arxive.org - might expect of you to wait
283 if (shouldWait() && lastCall != null) {
285 long elapsed = new Date().getTime() - lastCall.getTime();
287 while (elapsed < waitTime) {
288 frame.output(Globals.lang("Waiting for ArXiv...") + ((waitTime - elapsed) / 1000) + " s");
290 elapsed = new Date().getTime() - lastCall.getTime();
294 frame.output(Globals.lang("Processing ") + key);
296 /* the cancel button has been hit */
300 /* query the archive and load the results into the BibtexEntry */
301 BibtexEntry be = importOai2Entry(key);
304 lastCall = new Date();
306 /* add the entry to the inspection dialog */
310 /* update the dialogs progress bar */
311 dialog.setProgress(i + 1, keys.length);
313 /* inform the inspection dialog, that we're done */
314 dialog.entryListComplete();
316 } catch (Exception e) {
317 frame.output(Globals.lang("Error while fetching from OIA2: ") + e.getMessage());