2 * Created on 13-Dec-2003
4 package net.sf.jabref.labelPattern;
6 import java.util.ArrayList;
7 import java.util.StringTokenizer;
9 import net.sf.jabref.*;
10 import net.sf.jabref.export.layout.format.RemoveLatexCommands;
14 * @author Ulrik Stervbo (ulriks AT ruc.dk)
17 * This is the utility class of the LabelPattern package.
18 * @author Ulrik Stervbo (ulriks AT ruc.dk)
20 public class LabelPatternUtil {
22 // All single characters that we can use for extending a key to make it unique:
23 private static String CHARS = "abcdefghijklmnopqrstuvwxyz";
25 public static ArrayList DEFAULT_LABELPATTERN;
27 updateDefaultPattern();
31 private static BibtexDatabase _db;
33 public static void updateDefaultPattern() {
34 DEFAULT_LABELPATTERN = split(Globals.prefs.get("defaultLabelPattern"));
38 * This method takes a string of the form [field1]spacer[field2]spacer[field3]...,
39 * where the fields are the (required) fields of a BibTex entry. The string is split
40 * into firlds and spacers by recognizing the [ and ].
42 * @param labelPattern a <code>String</code>
43 * @return an <code>ArrayList</code> The first item of the list
44 * is a string representation of the key pattern (the parameter),
45 * the second item is the spacer character (a <code>String</code>).
47 public static ArrayList split(String labelPattern) {
48 // A holder for fields of the entry to be used for the key
49 ArrayList _alist = new ArrayList();
51 // Before we do anything, we add the parameter to the ArrayLIst
52 _alist.add(labelPattern);
54 //String[] ss = labelPattern.split("\\[|\\]");
55 StringTokenizer tok = new StringTokenizer(labelPattern, "[]", true);
56 while (tok.hasMoreTokens()) {
57 _alist.add(tok.nextToken());
63 // Regular expresion for identifying the fields
64 Pattern pi = Pattern.compile("\\[\\w*\\]");
65 // Regular expresion for identifying the spacer
66 Pattern ps = Pattern.compile("\\].()*\\[");
68 // The matcher for the field
69 Matcher mi = pi.matcher(labelPattern);
70 // The matcher for the spacer char
71 Matcher ms = ps.matcher(labelPattern);
73 // Before we do anything, we add the parameter to the ArrayLIst
74 _alist.add(labelPattern);
76 // If we can find the spacer character
78 String t_spacer = ms.group();
79 // Remove the `]' and `[' at the ends
80 // We cant imagine a spacer of omre than one character.
81 t_spacer = t_spacer.substring(1,2);
86 // Get the matched string
87 String t_str = mi.group();
89 int _eindex = t_str.length() -1;
90 // Remove the `[' and `]' at the ends
91 t_str = t_str.substring(_sindex, _eindex);
99 * Generates a BibTeX label according to the pattern for a given entry type, and
100 * returns the <code>Bibtexentry</code> with the unique label.
101 * @param table a <code>LabelPattern</code>
102 * @param database a <code>BibtexDatabase</code>
103 * @param _entry a <code>BibtexEntry</code>
104 * @return modified Bibtexentry
106 public static BibtexEntry makeLabel(LabelPattern table,
107 BibtexDatabase database,
108 BibtexEntry _entry) {
111 String _spacer, _label;
112 StringBuffer _sb = new StringBuffer();
113 boolean forceUpper = false, forceLower = false;
116 // get the type of entry
117 String _type = _entry.getType().getName().toLowerCase();
118 // Get the arrayList corrosponding to the type
119 _al = table.getValue(_type);
120 int _alSize = _al.size();
121 boolean field = false;
122 for (int i = 1; i < _alSize; i++) {
123 String val = _al.get(i).toString();
124 if (val.equals("[")) {
127 else if (val.equals("]")) {
131 /* Edited by Seb Wills <saw27@mrao.cam.ac.uk> on 13-Apr-2004
132 Added new pseudo-fields "shortyear" and "veryshorttitle", and
133 and ":lower" modifier for all fields (in a way easily extended to other modifiers).
134 Helpfile help/LabelPatterns.html updated accordingly.
136 // check whether there is a modifier on the end such as ":lower"
137 //String modifier = null;
138 String[] parts = val.split(":");
140 //int _mi = val.indexOf(":");
141 //if(_mi != -1 && _mi != val.length()-1 && _mi != 0) { // ":" is in val and isn't first or last character
142 //modifier=val.substring(_mi+1);
143 //val=val.substring(0,_mi);
145 StringBuffer _sbvalue = new StringBuffer();
149 if (val.startsWith("auth") || val.startsWith("pureauth")) {
151 // For label code "auth...": if there is no author, but there are editor(s)
152 // (e.g. for an Edited Book), use the editor(s) instead. (saw27@mrao.cam.ac.uk).
153 // This is what most people want, but in case somebody really needs a field which
154 // expands to nothing if there is no author (e.g. someone who uses both "auth"
155 // and "ed" in the same label), we provide an alternative form "pureauth..." which
156 // does not do this fallback substitution of editor.
159 if(val.startsWith("pure")) {
160 // remove the "pure" prefix so the remaining code in this section functions correctly
161 val = val.substring(4);
162 System.out.println("val is now "+val);
163 authString = _entry.getField("author").toString(); // use even if empty
164 System.out.println("Got authString " + authString);
166 if (_entry.getField("author") == null || _entry.getField("author").toString().equals("")) {
167 authString = _entry.getField("editor").toString();
169 authString = _entry.getField("author").toString();
173 // Gather all author-related checks, so we don't have to check all all the time.
174 if (val.equals("auth")) {
175 _sbvalue.append(firstAuthor(authString));
177 else if (val.equals("authors")) {
178 _sbvalue.append(allAuthors(authString));
180 // Last author's last name
181 else if (val.equals("authorLast")) {
182 _sbvalue.append(lastAuthor(authString));
184 else if (val.equals("authorIni")) {
185 _sbvalue.append(oneAuthorPlusIni(authString));
187 else if (val.matches("authIni[\\d]+")) {
188 int num = Integer.parseInt(val.substring(7));
189 _sbvalue.append(authIniN(authString,num));
191 else if (val.equals("auth.auth.ea")) {
192 _sbvalue.append(authAuthEa(authString));
194 else if (val.equals("auth.etal")) {
195 _sbvalue.append(authEtal(authString));
198 else if (val.equals("authshort")) {
199 _sbvalue.append(authshort(authString));
201 else if (val.matches("auth[\\d]+_[\\d]+")) {
202 String[] nums = val.substring(4).split("_");
203 _sbvalue.append(authN_M(authString,
204 Integer.parseInt(nums[0]),
205 Integer.parseInt(nums[1]) - 1));
207 // authN. First N chars of the first author's last name.
208 else if (val.matches("auth\\d+")) {
209 int num = Integer.parseInt(val.substring(4));
210 String fa = firstAuthor(authString);
211 if ( num > fa.length() )
213 _sbvalue.append(fa.substring(0,num));
215 else if (val.matches("authors\\d+")) {
216 _sbvalue.append(NAuthors(authString,Integer.parseInt(val.substring(7))));
220 // This "auth" business was a dead end, so just use it literally:
221 _sbvalue.append(_entry.getField(val).toString());
224 else if (val.startsWith("ed")) {
225 // Gather all markers starting with "ed" here, so we don't have to check all all the time.
226 if (val.equals("edtr")) {
227 _sbvalue.append(firstAuthor(_entry.getField("editor").toString()));
229 else if (val.equals("editors")) {
230 _sbvalue.append(allAuthors(_entry.getField("editor").toString()));
232 else if (val.equals("editorIni")) {
233 _sbvalue.append(oneAuthorPlusIni(_entry.getField("editor").toString()));
235 else if (val.matches("edtrIni[\\d]+")) {
236 int num = Integer.parseInt(val.substring(7));
237 _sbvalue.append(authIniN(_entry.getField("editor").toString(),num));
239 else if (val.matches("edtr[\\d]+_[\\d]+")) {
240 String[] nums = val.substring(4).split("_");
241 _sbvalue.append(authN_M(_entry.getField("editor").toString(), Integer.parseInt(nums[0]),
242 Integer.parseInt(nums[1])-1));
244 else if (val.equals("edtr.edtr.ea")) {
245 _sbvalue.append(authAuthEa(_entry.getField("editor").toString()));
247 else if (val.equals("edtrshort")) {
248 _sbvalue.append(authshort(_entry.getField("editor").toString()));
250 // authN. First N chars of the first author's last name.
251 else if (val.matches("edtr\\d+")) {
252 int num = Integer.parseInt(val.substring(4));
253 String fa = firstAuthor(_entry.getField("editor").toString());
254 if ( num > fa.length() )
256 _sbvalue.append(fa.substring(0,num));
258 // Last editor's last name
259 else if (val.equals("editorLast")) {
260 _sbvalue.append(lastAuthor((String)(_entry.getField("editor"))));
263 // This "ed" business was a dead end, so just use it literally:
264 _sbvalue.append(_entry.getField(val).toString());
267 else if (val.equals("firstpage")) {
268 _sbvalue.append(firstPage(_entry.getField("pages").toString()));
271 else if (val.equals("lastpage")) {
272 _sbvalue.append(lastPage(_entry.getField("pages").toString()));
274 else if (val.equals("shorttitle")) {
275 _sbvalue.append(getTitleWords(3, _entry));
277 else if (val.equals("shortyear")) {
278 String ss = _entry.getField("year").toString();
279 if (ss.startsWith("in") || ss.startsWith("sub")) {
280 _sbvalue.append("IP");
282 else if (ss.length() > 2) {
283 _sbvalue.append(ss.substring(ss.length() - 2));
290 else if(val.equals("veryshorttitle")) {
291 _sbvalue.append(getTitleWords(1, _entry));
294 else if (val.matches("keyword\\d+")) {
295 int num = Integer.parseInt(val.substring(7));
296 String kw = _entry.getField("keywords").toString();
298 String[] keywords = kw.split("[,;]\\s*");
299 if ((num > 0) && (num < keywords.length))
300 _sbvalue.append(keywords[num-1].trim());
304 // we havent seen any special demands
306 _sbvalue.append(_entry.getField(val).toString());
309 catch (NullPointerException ex) {
310 //Globals.logger("Key generator warning: field '" + val + "' empty.");
312 // apply modifier if present
313 if (parts.length > 1) for (int j=1; j<parts.length; j++) {
314 String modifier = parts[j];
316 if(modifier.equals("lower")) {
317 String tmp = _sbvalue.toString().toLowerCase();
318 _sbvalue = new StringBuffer(tmp);
320 else if (modifier.equals("abbr")) {
321 // Abbreviate - that is,
322 //System.out.println(_sbvalue.toString());
323 StringBuffer abbr = new StringBuffer();
324 String[] words = _sbvalue.toString().replaceAll("[\\{\\}]","")
325 .split("[ \r\n]");//split("\\b");
326 for (int word=0; word<words.length; word++)
327 if (words[word].length() > 0)
328 abbr.append(words[word].charAt(0));
332 Globals.logger("Key generator warning: unknown modifier '"+modifier+"'.");
336 _sb.append(_sbvalue);
346 catch (Exception e) {
347 System.err.println(e);
351 * Edited by Morten Alver 2004.02.04.
353 * We now have a system for easing key duplicate prevention, so
354 * I am changing this method to conform to it.
357 // here we make sure the key is unique
358 _label = makeLabelUnique(_sb.toString());
359 _entry.setField(Globals.KEY_FIELD, _label);
363 // Remove all illegal characters from the key.
364 _label = Util.checkLegalKey(_sb.toString());
366 // Patch by Toralf Senger:
367 // Remove Regular Expressions while generating Keys
368 String regex = Globals.prefs.get("KeyPatternRegex");
369 if ((regex != null) && (regex.trim().length() > 0)) {
370 String replacement = Globals.prefs.get("KeyPatternReplacement");
371 _label = _label.replaceAll(regex, replacement);
375 _label = _label.toUpperCase();
378 _label = _label.toLowerCase();
382 String oldKey = _entry.getCiteKey();
383 int occurences = _db.getNumberOfKeyOccurences(_label);
384 if ((oldKey != null) && oldKey.equals(_label))
385 occurences--; // No change, so we can accept one dupe.
387 // Try new keys until we get a unique one:
388 //if (_db.setCiteKeyForEntry(_entry.getId(), _label)) {
390 if (occurences == 0) {
391 // No dupes found, so we can just go ahead.
392 if (!_label.equals(oldKey))
393 _db.setCiteKeyForEntry(_entry.getId(), _label);
398 // The key is already in use, so we must modify it.
401 String moddedKey = _label+getAddition(number);
402 occurences = _db.getNumberOfKeyOccurences(moddedKey);
403 if ((oldKey != null) && oldKey.equals(moddedKey))
405 while (occurences > 0) {
407 moddedKey = _label+getAddition(number);
409 occurences = _db.getNumberOfKeyOccurences(moddedKey);
410 if ((oldKey != null) && oldKey.equals(moddedKey))
416 String modKey = _label + "a";
417 occurences = _db.getNumberOfKeyOccurences(modKey);
418 if ((oldKey != null) && oldKey.equals(modKey))
420 //while (_db.setCiteKeyForEntry(_entry.getId(), modKey)) {
421 while (occurences > 0) {
422 modKey = _label + ( (char) (c++));
424 occurences = _db.getNumberOfKeyOccurences(modKey);
425 if ((oldKey != null) && oldKey.equals(modKey))
430 if (!moddedKey.equals(oldKey)) {
431 _db.setCiteKeyForEntry(_entry.getId(), moddedKey);
436 /** End of edit, Morten Alver 2004.02.04. */
441 * Computes an appendix to a BibTeX key that could make it unique. We use a-z for numbers
442 * 0-25, and then aa-az, ba-bz, etc.
443 * @param number The appendix number.
444 * @return The String to append.
446 private static String getAddition(int number) {
448 if (number >= CHARS.length()) {
449 int lastChar = number % CHARS.length();
450 return getAddition(number/CHARS.length()-1) + CHARS.substring(lastChar, lastChar+1);
452 return CHARS.substring(number, number+1);
456 static String getTitleWords(int number, BibtexEntry _entry) {
457 String ss = (new RemoveLatexCommands()).format(_entry.getField("title").toString());
458 StringBuffer _sbvalue = new StringBuffer(),
460 int piv=0, words = 0;
462 // sorry for being English-centric. I guess these
463 // words should really be an editable preference.
464 mainl: while ((piv < ss.length()) && (words < number)) {
465 current = new StringBuffer();
466 // Get the next word:
467 while ((piv<ss.length()) && !Character.isWhitespace(ss.charAt(piv))) {
468 current.append(ss.charAt(piv));
470 //System.out.println(".. "+piv+" '"+current.toString()+"'");
473 // Check if it is ok:
474 String word = current.toString().trim();
475 if (word.length() == 0)
477 for(int _i=0; _i< Globals.SKIP_WORDS.length; _i++) {
478 if (word.equalsIgnoreCase(Globals.SKIP_WORDS[_i])) {
483 // If we get here, the word was accepted.
484 if (_sbvalue.length() > 0)
485 _sbvalue.append(" ");
486 _sbvalue.append(word);
490 return _sbvalue.toString();
495 * Tests whether a given label is unique.
496 * @param label a <code>String</code>
497 * @return <code>true</code> if and only if the <code>label</code> is unique
499 public static boolean isLabelUnique(String label) {
500 boolean _isUnique = true;
502 int _dbSize = _db.getEntryCount();
503 // run through the whole DB and check the key field
504 // if this could be made recursive I would be very happy
505 // it kinda sux that we have to run through the whole db.
506 // The idea here is that if we meet NO match, the _duplicate
507 // field will be true
509 for (int i = 0; i < _dbSize; i++) {
510 _entry = _db.getEntryById(String.valueOf(i));
512 // oh my! there is a match! we better set the uniqueness to false
513 // and leave this for-loop all together
514 if (_entry.getField(BibtexFields.KEY_FIELD).equals(label)) {
525 * Gets the last name of the first author/editor
526 * @param authorField a <code>String</code>
527 * @return the sur name of an author/editor
529 private static String firstAuthor(String authorField) {
530 String[] tokens = AuthorList.fixAuthorForAlphabetization(authorField).split("\\band\\b");
531 if (tokens.length > 0) { // if author is empty
532 String[] firstAuthor = tokens[0].replaceAll("\\s+", " ").split(" ");
533 return firstAuthor[0];
540 * Gets the last name of the last author/editor
541 * @param authorField a <code>String</code>
542 * @return the sur name of an author/editor
544 private static String lastAuthor(String authorField) {
545 String[] tokens = AuthorList.fixAuthorForAlphabetization(authorField).split("\\band\\b");
546 if (tokens.length > 0) { // if author is empty
547 String[] lastAuthor = tokens[tokens.length-1].replaceAll("\\s+", " ").trim().split(" ");
548 return lastAuthor[0];
556 * Gets the last name of all authors/editors
557 * @param authorField a <code>String</code>
558 * @return the sur name of all authors/editors
560 private static String allAuthors(String authorField) {
562 // This code was part of 'ApplyRule' in 'ArticleLabelRule'
563 String[] tokens = AuthorList.fixAuthorForAlphabetization(authorField).split("\\band\\b");
565 while (tokens.length > i) {
566 // convert lastname, firstname to firstname lastname
567 String[] firstAuthor = tokens[i].replaceAll("\\s+", " ").trim().split(" ");
568 // lastname, firstname
569 author += firstAuthor[0];
576 * Gets the surnames of the first N authors and appends EtAl if there are more than N authors
577 * @param authorField a <code>String</code>
578 * @param n the number of desired authors
579 * @return Gets the surnames of the first N authors and appends EtAl if there are more than N authors
581 private static String NAuthors(String authorField, int n) {
583 // This code was part of 'ApplyRule' in 'ArticleLabelRule'
584 String[] tokens = AuthorList.fixAuthorForAlphabetization(authorField).split("\\band\\b");
586 while (tokens.length > i && i < n) {
587 // convert lastname, firstname to firstname lastname
588 String[] firstAuthor = tokens[i].replaceAll("\\s+", " ").trim().split(" ");
589 // lastname, firstname
590 author += firstAuthor[0];
593 if (tokens.length <= n) return author;
594 return author += "EtAl";
598 * Gets the first part of the last name of the first
599 * author/editor, and appends the last name initial of the
600 * remaining authors/editors.
601 * @param authorField a <code>String</code>
602 * @return the sur name of all authors/editors
604 private static String oneAuthorPlusIni(String authorField) {
605 final int CHARS_OF_FIRST = 5;
606 authorField = AuthorList.fixAuthorForAlphabetization(authorField);
608 // This code was part of 'ApplyRule' in 'ArticleLabelRule'
609 String[] tokens = authorField.split("\\band\\b");
611 if (tokens.length == 0) {
614 String[] firstAuthor = tokens[0].replaceAll("\\s+", " ").split(" ");
615 author = firstAuthor[0].substring(0,
616 (int) Math.min(CHARS_OF_FIRST,
617 firstAuthor[0].length()));
618 while (tokens.length > i) {
619 // convert lastname, firstname to firstname lastname
620 author += tokens[i].trim().charAt(0);
628 * auth.auth.ea format:
629 * Isaac Newton and James Maxwell and Albert Einstein (1960)
630 * Isaac Newton and James Maxwell (1960)
635 private static String authAuthEa(String authorField) {
636 authorField = AuthorList.fixAuthorForAlphabetization(authorField);
637 StringBuffer author = new StringBuffer();
639 String[] tokens = authorField.split("\\band\\b");
640 if (tokens.length == 0) {
643 author.append((tokens[0].split(","))[0]);
644 if (tokens.length >= 2)
645 author.append(".").append((tokens[1].split(","))[0]);
646 if (tokens.length > 2)
647 author.append(".ea");
649 return author.toString();
654 * Isaac Newton and James Maxwell and Albert Einstein (1960)
655 * Isaac Newton and James Maxwell (1960)
660 private static String authEtal(String authorField) {
661 authorField = AuthorList.fixAuthorForAlphabetization(authorField);
662 StringBuffer author = new StringBuffer();
664 String[] tokens = authorField.split("\\band\\b");
665 if (tokens.length == 0) {
668 author.append((tokens[0].split(","))[0]);
669 if (tokens.length == 2)
670 author.append(".").append((tokens[1].split(","))[0]);
671 else if (tokens.length > 2)
672 author.append(".etal");
674 return author.toString();
678 * The first N characters of the Mth author/editor.
680 private static String authN_M(String authorField, int n, int m) {
681 authorField = AuthorList.fixAuthorForAlphabetization(authorField);
682 StringBuffer author = new StringBuffer();
684 String[] tokens = authorField.split("\\band\\b");
685 if ((tokens.length <= m) || (n<0) || (m<0)) {
688 String lastName = (tokens[m].split(","))[0].trim();
689 //System.out.println(lastName);
690 if (lastName.length() <= n)
693 return lastName.substring(0, n);
698 * added by Kolja Brix, kbx@users.sourceforge.net
701 * Isaac Newton and James Maxwell and Albert Einstein and N. Bohr
702 * Isaac Newton and James Maxwell and Albert Einstein
703 * Isaac Newton and James Maxwell
711 private static String authshort(String authorField) {
712 authorField = AuthorList.fixAuthorForAlphabetization(authorField);
713 StringBuffer author = new StringBuffer();
714 String[] tokens = authorField.split("\\band\\b");
717 if (tokens.length == 1) {
719 author.append(authN_M(authorField,authorField.length(),0));
721 } else if (tokens.length >= 2) {
723 while (tokens.length > i && i<3) {
724 author.append(authN_M(authorField,1,i));
728 if (tokens.length > 3)
733 return author.toString();
738 * Each author gets (N div #authors) chars, the remaining
739 * (N mod #authors) chars are equally distributed to the
740 * authors first in the row.
741 * If (N < #authors), only the fist N authors get mentioned.
742 * a) I. Newton and J. Maxwell and A. Einstein and N. Bohr (..)
743 * b) I. Newton and J. Maxwell and A. Einstein
744 * c) I. Newton and J. Maxwell
746 * E.g. authIni4 gives: a) NMEB, b) NeME, c) NeMa, d) Newt
748 private static String authIniN(String authorField, int n) {
749 authorField = AuthorList.fixAuthorForAlphabetization(authorField);
750 StringBuffer author = new StringBuffer();
751 String[] tokens = authorField.split("\\band\\b");
753 int charsAll = n / tokens.length;
755 if (tokens.length == 0) {
756 return author.toString();
759 while (tokens.length > i) {
760 if ( i < (n % tokens.length) ) {
761 author.append(authN_M(authorField,charsAll+1,i));
763 author.append(authN_M(authorField,charsAll,i));
768 if (author.length() <= n)
769 return author.toString();
771 return author.toString().substring(0, n);
776 * Split the pages field into two and return the first one
777 * @param pages a <code>String</code>
778 * @return the first page number
780 private static String firstPage(String pages) {
781 String[] _pages = pages.split("-");
786 * Split the pages field into two and return the last one
787 * @param pages a <code>String</code>
788 * @return the last page number
790 private static String lastPage(String pages) {
791 String[] _pages = pages.split("-");