18a489b0529f5a3e086a300a2e5425459a321e40
[debian/jabref.git] / src / java / net / sf / jabref / imports / MedlineFetcher.java
1 package net.sf.jabref.imports;
2
3 import java.util.ArrayList;
4 import java.net.*;
5 import javax.swing.*;
6 import java.awt.*;
7 import java.awt.event.ActionListener;
8 import javax.xml.parsers.SAXParserFactory;
9 import javax.xml.parsers.SAXParser;
10 import java.awt.event.ActionEvent;
11 import java.awt.event.FocusAdapter;
12 import java.awt.event.FocusEvent;
13 import java.util.Iterator;
14 import java.util.regex.Pattern;
15 import java.util.regex.Matcher;
16 import net.sf.jabref.*;
17 import net.sf.jabref.undo.NamedCompound;
18 import net.sf.jabref.undo.UndoableInsertEntry;
19 import java.io.*;
20 import net.sf.jabref.HelpAction;
21 import net.sf.jabref.gui.ImportInspectionDialog;
22
23 /**
24  * <p>Title: </p>
25  * <p>Description: </p>
26  * <p>Copyright: Copyright (c) 2003</p>
27  * <p>Company: </p>
28  * @author not attributable
29  * @version 1.0
30  */
31
32 public class MedlineFetcher extends SidePaneComponent implements Runnable,
33         ImportInspectionDialog.CallBack {
34
35     /**@class SearchResult
36      *        nested class.
37      */
38     public class SearchResult {
39         public int count;
40         public int retmax;
41         public int retstart;
42         public String ids = "";
43     public ArrayList idList = new ArrayList();
44         public SearchResult()
45             {
46                 count = 0;
47                 retmax = 0;
48                 retstart = 0;
49             }
50
51         public void addID(String id)
52         {
53
54         idList.add(id);
55                 if(!ids.equals(""))
56                     ids += ","+id;
57                 else
58                     ids = id;
59             }
60     }
61     final int PACING = 20;
62     final int MAX_TO_FETCH = 10;
63     boolean keepOn = true;
64     String idList;
65     JTextField tf = new JTextField();
66     JPanel pan = new JPanel();
67     GridBagLayout gbl = new GridBagLayout();
68     GridBagConstraints con = new GridBagConstraints();
69     MedlineFetcher ths = this;
70     AuthorDialog authorDialog;
71     JFrame jFrame; // invisible dialog holder
72     JButton go = new JButton(Globals.lang("Fetch")),
73         helpBut = new JButton(GUIGlobals.getImage("helpSmall"));
74     HelpAction help;
75
76     public MedlineFetcher(SidePaneManager p0) {
77         super(p0, GUIGlobals.getIconUrl("medline"), Globals.lang("Fetch Medline"));
78
79         help = new HelpAction(Globals.helpDiag, GUIGlobals.medlineHelp, "Help");
80         helpBut.addActionListener(help);
81         helpBut.setMargin(new Insets(0,0,0,0));
82         tf.setPreferredSize(new Dimension(1,tf.getPreferredSize().height));
83         //add(hd, BorderLayout.NORTH);
84         //ok.setToolTipText(Globals.lang("Fetch Medline"));
85         JPanel main = new JPanel();
86             main.setLayout(gbl);
87         con.fill = GridBagConstraints.BOTH;
88         //con.insets = new Insets(0, 0, 2,  0);
89         con.gridwidth = GridBagConstraints.REMAINDER;
90         con.weightx = 1;
91         con.weighty = 1;
92         con.fill = GridBagConstraints.BOTH;
93         gbl.setConstraints(tf, con);
94         main.add(tf);
95         con.weighty = 0;
96         con.gridwidth = 1;
97         gbl.setConstraints(go, con);
98         main.add(go);
99         con.gridwidth = GridBagConstraints.REMAINDER;
100         gbl.setConstraints(helpBut, con);
101         main.add(helpBut);
102         ActionListener listener = new ActionListener() {
103                 public void actionPerformed(ActionEvent e) {
104                     (new Thread(ths)).start(); // Run fetch in thread.
105                 }
106             };
107         main.setBorder(BorderFactory.createEmptyBorder(1,1,1,1));
108         add(main, BorderLayout.CENTER);
109         go.addActionListener(listener);
110         tf.addActionListener(listener);
111         tf.addFocusListener(new FocusAdapter() {
112             public void focusGained(FocusEvent event) {
113                 if (!event.isTemporary() && (tf.getText().length()>0)) {
114                     tf.selectAll();
115                 }
116             }
117         });
118     }
119
120     public JTextField getTextField() {
121         return tf;
122     }
123
124     public void fetchById() {
125         //if(idList==null || idList.trim().equals(""))//if user pressed cancel
126         //  return;
127         Pattern p = Pattern.compile("\\d+[,\\d+]*");
128         //System.out.println(""+p+"\t"+idList);
129         Matcher m = p.matcher( idList );
130         if ( m.matches() ) {
131             panel.frame().output(Globals.lang("Fetching Medline by ID..."));
132
133             ArrayList bibs = fetchMedline(idList);
134             if ((bibs != null) && (bibs.size() > 0)) {
135                 //if (panel.prefs().getBoolean("useOwner")) {
136                 //    Util.setDefaultOwner(bibs, panel.prefs().get("defaultOwner"));
137                 //}
138                 tf.setText("");
139                 /*NamedCompound ce = new NamedCompound("fetch Medline");
140                 Iterator i = bibs.iterator();
141                 while (i.hasNext()) {
142                     try {
143                         BibtexEntry be = (BibtexEntry) i.next();
144                         String id = Util.createId(be.getType(), panel.database());
145                         be.setId(id);
146                         entries.add(be);
147                         //panel.database().insertEntry(be);
148                         //ce.addEdit(new UndoableInsertEntry(panel.database(), be, panel));
149                     }
150                     catch (KeyCollisionException ex) {
151                     }
152                     }*/
153                 //ce.end();
154
155         panel.frame().addImportedEntries(panel, bibs, null, false, this);
156
157         /*
158                 int importedEntries = panel.frame().addBibEntries(bibs, null, false);
159         if (importedEntries == 0) {
160             return; // Nothing to refresh!
161         }
162         panel.markBaseChanged();
163                 panel.refreshTable();
164         if (bibs.size() > 0) {
165             BibtexEntry[] entries = (BibtexEntry[])bibs.toArray(new BibtexEntry[0]);
166             panel.selectEntries(entries, 0);
167             if (entries.length == 1)
168                 panel.showEntry(entries[0]);
169             //else
170             //    panel.updateViewToSelected();
171         }*/
172
173                 //panel.undoManager.addEdit(ce);
174             } else
175                 panel.output(Globals.lang("No Medline entries found."));
176         } else {
177             JOptionPane.showMessageDialog(panel.frame(),Globals.lang("Please enter a semicolon or comma separated list of Medline IDs (numbers)."),Globals.lang("Input error"),JOptionPane.ERROR_MESSAGE);
178         }
179     }
180
181
182
183 //==================================================
184 //
185 //==================================================
186   public static ArrayList fetchMedline(String id)
187   {
188     ArrayList bibItems=null;
189     try {
190
191       String baseUrl = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&retmode=xml&rettype=citation&id=" + id;
192
193       URL url = new URL( baseUrl );
194       HttpURLConnection data = (HttpURLConnection)url.openConnection();
195
196
197        /* Reader un = new InputStreamReader(data.getInputStream());
198         int c;
199         while ((c=un.read()) != -1) {
200           System.out.print((char)c);
201         }*/
202
203
204         // Obtain a factory object for creating SAX parsers
205         SAXParserFactory parserFactory = SAXParserFactory.newInstance();
206         // Configure the factory object to specify attributes of the parsers it creates
207         parserFactory.setValidating(true);
208         parserFactory.setNamespaceAware(true);
209
210         // Now create a SAXParser object
211         SAXParser parser = parserFactory.newSAXParser();   //May throw exceptions
212         MedlineHandler handler = new MedlineHandler();
213         // Start the parser. It reads the file and calls methods of the handler.
214
215         parser.parse( data.getInputStream(), handler);
216         /*FileOutputStream out = new FileOutputStream(new File("/home/alver/ut.txt"));
217         System.out.println("#####");
218         InputStream is = data.getInputStream();
219         int c;
220         while ((c = is.read()) != -1) {
221             out.write((char)c);
222         }
223         System.out.println("#####");
224         out.close();*/
225         // When you're done, report the results stored by your handler object
226         bibItems = handler.getItems();
227
228     }
229     catch(javax.xml.parsers.ParserConfigurationException e1){}
230     catch(org.xml.sax.SAXException e2){}
231     catch(java.io.IOException e3){}
232     return bibItems;
233 }
234
235    public void run() {
236
237         idList = tf.getText().replace(';', ',');
238
239         //if(idList==null || idList.trim().equals(""))//if user pressed cancel
240         //    return;
241         Pattern p1 = Pattern.compile("\\d+[,\\d+]*"),
242             p2 = Pattern.compile(".+[,.+]*");
243
244          Matcher m1 = p1.matcher( idList ),
245              m2 = p2.matcher( idList );
246          if ( m1.matches() ) {
247              panel.frame().output(Globals.lang("Fetching Medline by id ..."));
248              idList = tf.getText().replace(';', ',');
249              fetchById();
250              //System.out.println("Fetch by id");
251          }
252          else if ( m2.matches() ) {
253             panel.frame().output(Globals.lang("Fetching Medline by term ..."));
254
255             // my stuff
256             //---------------------------
257             String searchTerm = setupTerm(idList); // fix the syntax
258             SearchResult result = getIds(searchTerm ,0,1); // get the ids from entrez
259             // prompt the user to number articles to retrieve
260             if (result.count == 0) {
261                 JOptionPane.showMessageDialog(panel.frame(), Globals.lang("No references found"));
262                 return;
263             }
264             String question =
265                 Globals.lang("References found")+": "
266                 + Integer.toString(result.count)+"  "
267                 + Globals.lang("Number of references to fetch?");
268             String strCount =
269                 JOptionPane.showInputDialog(question,
270                                             Integer.toString(result.count));
271
272             // for strCount ...
273             if((strCount == null) || strCount.equals(""))
274                 return;
275             int count;
276         try {
277             count = Integer.parseInt(strCount);
278         } catch (NumberFormatException ex) {
279             panel.output("");
280             return;
281         }
282
283         ImportInspectionDialog diag = new ImportInspectionDialog(panel.frame(), panel,
284                 BibtexFields.DEFAULT_INSPECTION_FIELDS, Globals.lang("Fetch Medline"), false);
285         Util.placeDialog(diag, panel.frame());
286          diag.setDefaultSelected(false); // Make sure new entries are not selected by default.
287
288              // diag.setProgress(0, count);
289         diag.setVisible(true);
290         keepOn = true;
291          diag.addCallBack(new ImportInspectionDialog.CallBack() {
292              public void done(int entriesImported) {
293                  if (entriesImported > 0) {
294                  panel.output(Globals.lang("Medline entries fetched")+": "+entriesImported);
295                  panel.markBaseChanged();
296              } else
297                  panel.output(Globals.lang("No Medline entries found."));
298             }
299
300              public void cancelled() {
301                  panel.output(Globals.lang("%0 import cancelled.", "Medline"));
302              }
303
304
305              public void stopFetching() {
306                 // Make sure the fetch loop exits at next iteration.
307                 keepOn = false;
308              }
309          });
310             for (int jj = 0; jj < count; jj+=PACING) {
311             if (!keepOn)
312                 break;
313                     // get the ids from entrez
314                     result = getIds(searchTerm,jj,PACING);
315
316             /*String[] test = getTitles((String[])result.idList.toArray(new String[0]));
317             for (int pelle=0; pelle<test.length; pelle++) {
318                 System.out.println(": "+test[pelle]);
319             } */
320
321             final ArrayList bibs = fetchMedline(result.ids);
322             if (!keepOn)
323                 break;
324             diag.addEntries(bibs);
325             diag.setProgress(jj+PACING, count);
326             }
327          diag.entryListComplete();
328          }
329    }
330     public String setupTerm(String in){
331         Pattern part1=Pattern.compile(", ");
332         Pattern part2=Pattern.compile(",");
333         Pattern part3=Pattern.compile(" ");
334         Matcher matcher;
335         matcher=part1.matcher(in);
336         in=matcher.replaceAll("\\+AND\\+");
337         matcher=part2.matcher(in);
338         in=matcher.replaceAll("\\+AND\\+");
339         matcher=part3.matcher(in);
340         in=matcher.replaceAll("+");
341
342         return in;
343     }
344
345     // this gets the initial list of ids
346     public SearchResult getIds(String term, int start,int pacing){
347         String baseUrl="http://eutils.ncbi.nlm.nih.gov/entrez/eutils";
348         String medlineUrl = baseUrl
349             +"/esearch.fcgi?db=pubmed&retmax="
350             +Integer.toString(pacing)
351             +"&retstart="+Integer.toString(start)
352             +"&term=";
353         Pattern idPattern=Pattern.compile("<Id>(\\d+)</Id>");
354         Pattern countPattern=Pattern.compile("<Count>(\\d+)<\\/Count>");
355         Pattern retMaxPattern=Pattern.compile("<RetMax>(\\d+)<\\/RetMax>");
356         Pattern retStartPattern=Pattern.compile("<RetStart>(\\d+)<\\/RetStart>");
357         Matcher idMatcher;
358         Matcher countMatcher;
359         Matcher retMaxMatcher;
360         Matcher retStartMatcher;
361         boolean doCount = true;
362         SearchResult result = new SearchResult();
363         //System.out.println(medlineUrl+term);
364         try{
365             URL ncbi = new URL(medlineUrl+term);
366             // get the ids
367             HttpURLConnection ncbiCon=(HttpURLConnection)ncbi.openConnection();
368             BufferedReader in =
369                 new BufferedReader
370                 (new InputStreamReader
371                  ( ncbi.openStream()));
372             String inLine;
373             while ((inLine=in.readLine())!=null){
374
375                 // get the count
376                 idMatcher=idPattern.matcher(inLine);
377                 if (idMatcher.find()){
378                     result.addID(idMatcher.group(1));
379                 }
380                 retMaxMatcher=retMaxPattern.matcher(inLine);
381                 if (idMatcher.find()){
382                     result.retmax=Integer.parseInt(retMaxMatcher.group(1));
383                 }
384                 retStartMatcher=retStartPattern.matcher(inLine);
385                 if (retStartMatcher.find()){
386                     result.retstart=Integer.parseInt(retStartMatcher.group(1));
387                 }
388                 countMatcher=countPattern.matcher(inLine);
389                 if (doCount && countMatcher.find()){
390                     result.count=Integer.parseInt(countMatcher.group(1));
391                     doCount = false;
392                 }
393             }
394
395         }
396         catch (MalformedURLException e) {     // new URL() failed
397             System.out.println("bad url");
398             e.printStackTrace();
399         }
400         catch (IOException e) {               // openConnection() failed
401             System.out.println("connection failed");
402             e.printStackTrace();
403
404         }
405         return result;
406     }
407
408     public String[] getTitles(String[] idArrayList) {
409       String[] titles = new String[Math.min(MAX_TO_FETCH, idArrayList.length)];
410         String temp;
411         for (int i=0; i<Math.min(MAX_TO_FETCH, idArrayList.length); i++){
412             temp=getOneCitation(idArrayList[i]);
413             titles[i]=getVitalData(temp);
414         }
415         return titles;
416     }
417
418         // get the xml for an entry
419     public String getOneCitation(String id){
420         String baseUrl="http://eutils.ncbi.nlm.nih.gov/entrez/eutils";
421         String retrieveUrl = baseUrl+"/efetch.fcgi?db=pubmed&retmode=xml&rettype=citation&id=";
422         StringBuffer sb=new StringBuffer();
423         try{
424             URL ncbi = new URL(retrieveUrl+id);
425             HttpURLConnection ncbiCon=(HttpURLConnection)ncbi.openConnection();
426             BufferedReader in =
427                 new BufferedReader
428                 (new InputStreamReader
429                  ( ncbi.openStream()));
430             String inLine;
431             while ((inLine=in.readLine())!=null){
432
433                 sb.append(inLine);
434             }
435
436         }
437         catch (MalformedURLException e) {     // new URL() failed
438             System.out.println("bad url");
439             e.printStackTrace();
440         }
441         catch (IOException e) {               // openConnection() failed
442             System.out.println("connection failed");
443             e.printStackTrace();
444
445         }
446         return sb.toString();
447     }
448
449         // parse out the titles from the xml
450     public String getVitalData(String sb){
451         StringBuffer result=new StringBuffer();
452         Pattern articleTitle=Pattern.compile("<ArticleTitle>(.+)</ArticleTitle>");
453         Pattern authorName=Pattern.compile("<Author>(.+)</Author>");
454         Matcher matcher;
455         matcher=articleTitle.matcher(sb);
456         if (matcher.find())
457         result.append("Title: ").append(matcher.group(1));
458
459         //matcher=authorName.matcher(sb);
460         //while (matcher.find())
461         //   result.append("\tAuthor: "+matcher.group(1));
462         return result.toString();
463     }
464
465     // This method is called by the dialog when the user has selected the
466     // wanted entries, and clicked Ok. The callback object can update status
467     // line etc.
468     public void done(int entriesImported) {
469         panel.output(Globals.lang("Medline entries fetched")+": "+entriesImported);
470     }
471
472     public void cancelled() {
473         panel.output(Globals.lang("%0 import cancelled.", "Medline"));
474     }
475
476
477     // This method is called by the dialog when the user has cancelled or
478     // signalled a stop. It is expected that any long-running fetch operations
479     // will stop after this method is called.
480     public void stopFetching() {
481         //To change body of implemented methods use File | Settings | File Templates.
482     }
483
484 }