cb2aa8cec55d9c33aeacb1202c56a32aecc2d6af
[debian/jabref.git] / src / java / net / sf / jabref / imports / EntryFromPDFCreator.java
1 package net.sf.jabref.imports;
2
3 import java.io.File;
4 import java.io.FileInputStream;
5 import java.io.FileNotFoundException;
6 import java.io.IOException;
7 import java.text.SimpleDateFormat;
8 import java.util.ArrayList;
9 import java.util.Calendar;
10 import java.util.List;
11 import java.util.logging.Level;
12 import java.util.logging.Logger;
13
14 import org.apache.pdfbox.pdmodel.PDDocument;
15 import org.apache.pdfbox.pdmodel.PDDocumentInformation;
16
17 import net.sf.jabref.BibtexEntry;
18 import net.sf.jabref.Globals;
19 import net.sf.jabref.JabRefPreferences;
20 import net.sf.jabref.OutputPrinterToNull;
21 import net.sf.jabref.external.ExternalFileType;
22 import net.sf.jabref.util.EncryptionNotSupportedException;
23 import net.sf.jabref.util.XMPUtil;
24
25 /**
26  * Uses XMPUtils to get one BibtexEntry for a PDF-File. 
27  * Also imports the non-XMP Data (PDDocument-Information) using XMPUtil.getBibtexEntryFromDocumentInformation.
28  * If data from more than one entry is read by XMPUtil then this entys are merged into one.  
29  * @author Dan
30  * @version 12.11.2008 | 22:12:48
31  * 
32  */
33 public class EntryFromPDFCreator extends EntryFromFileCreator {
34
35         private static Logger logger = Logger.getLogger(EntryFromPDFCreator.class.getName());
36         
37         public EntryFromPDFCreator() {
38                 super(getPDFExternalFileType());
39         }
40         
41         private static ExternalFileType getPDFExternalFileType(){
42                 ExternalFileType pdfFileType = JabRefPreferences.getInstance().getExternalFileTypeByExt("pdf");
43                 if (pdfFileType==null){
44                         return new ExternalFileType("PDF", "pdf", "application/pdf", "evince", "pdfSmall");
45                 }
46                 return pdfFileType;
47         }
48
49         /*
50          * (non-Javadoc)
51          * 
52          * @see net.sf.jabref.imports.EntryFromFileCreator#accept(java.io.File)
53          * 
54          * Accepts all Files having as suffix ".PDF" (in ignore case mode).
55          */
56         @Override
57         public boolean accept(File f) {
58                 return f != null && f.getName().toUpperCase().endsWith(".PDF");
59         }
60
61         @Override
62         protected BibtexEntry createBibtexEntry(File pdfFile) {
63
64                 if (!accept(pdfFile)) {
65                         return null;
66                 }
67
68                 BibtexEntry entry = new BibtexEntry();
69
70                 // Read pdf specific metadata
71                 // use PdfContentImporter
72                 PdfContentImporter pci = new PdfContentImporter();
73                 try {
74                         ArrayList<BibtexEntry> list =  (ArrayList<BibtexEntry>) pci.importEntries(new FileInputStream(pdfFile), new OutputPrinterToNull());
75                         // there should only be one entry in the arraylist
76                         if(list != null && !list.isEmpty()) {
77                                 return list.iterator().next();
78                         }
79                 } catch (FileNotFoundException e) {
80                     logger.log(Level.SEVERE, "File not found", e);
81                 } catch (IOException e) {
82             logger.log(Level.SEVERE, "Error opening file", e);
83                 }
84                 
85                 return null;
86                 
87                 /*addEntryDataFromPDDocumentInformation(pdfFile, entry);
88                 addEntyDataFromXMP(pdfFile, entry);
89
90                 if (entry.getField("title") == null) {
91                         entry.setField("title", pdfFile.getName());
92                 }
93
94                 return entry;*/
95         }
96
97         /** Adds entry data read from the PDDocument information of the file.
98          * @param pdfFile
99          * @param entry
100          */
101         private void addEntryDataFromPDDocumentInformation(File pdfFile, BibtexEntry entry) {
102                 PDDocument document = null;
103                 try {
104                         document = PDDocument.load(pdfFile.getAbsoluteFile());
105                         PDDocumentInformation pdfDocInfo = document
106                                         .getDocumentInformation();
107                         
108                         if (pdfDocInfo!=null){
109                                 BibtexEntry entryDI = XMPUtil.getBibtexEntryFromDocumentInformation(document
110                                                 .getDocumentInformation());
111                                 if (entryDI!=null){
112                                         addEntryDataToEntry(entry,entryDI);
113                                         Calendar creationDate = pdfDocInfo.getCreationDate();
114                                         if (creationDate != null) {
115                                                 String date = new SimpleDateFormat("yyyy.MM.dd")
116                                                                 .format(creationDate.getTime());
117                                                 appendToField(entry, "timestamp", date.toString());
118                                         }
119                 
120                                         if (pdfDocInfo.getCustomMetadataValue("bibtex/bibtexkey") != null){
121                                                 entry.setId(pdfDocInfo
122                                                                 .getCustomMetadataValue("bibtex/bibtexkey"));
123                                         }
124                                 }
125                         }
126                 } catch (IOException e) {
127                         // no canceling here, just no data added.
128                 } finally {
129                         if (document != null) {
130                                 try {
131                                         document.close();
132                                 } catch (IOException e) {
133                                         // no canceling here, just no data added.
134                                 }
135                         }
136                 }
137         }
138
139         /**
140          * Adds all data Found in all the entrys of this XMP file to the given
141          * entry. This was implemented without having much knowledge of the XMP
142          * format.
143          * 
144          * @param aFile
145          * @param entry
146          */
147         private void addEntyDataFromXMP(File aFile, BibtexEntry entry) {
148                 try {
149                         List<BibtexEntry> entrys = XMPUtil.readXMP(aFile.getAbsoluteFile());
150                         addEntrysToEntry(entry, entrys);
151                 } catch (EncryptionNotSupportedException e) {
152                         // no canceling here, just no data added.
153                 } catch (IOException e) {
154                         // no canceling here, just no data added.
155                 }
156         }
157
158         @Override
159         public String getFormatName() {
160                 return "PDF";
161         }
162
163 }