BibDesk and List of Publications

BibDesk is a fantastic tool to organize your library of scientific papers. It can also be used to organize your own publications. On my webpage I prefer to list my publications in a special order, namely organized by the categories journal papers and book chapters, conference papers, and everything else. Ideally, each record would also link to an attached PDF file and all I would have to do is keep my BibDesk database organized and my online list of publications would be kept up-to-date automatically.
Unfortunately, generating publication listings with attached PDF files organized by categories doesn’t seem to be so straightforward in BibDesk’s very own export mechanism. So here is what I came up with myself, a PERL script that I can call from within emacs to automatically include the list of publications where I need it.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281 #!/usr/bin/perl -w
# Copyright (C) 2009 by Bjoern Rueffer, Time-stamp: <2009-11-05 00:59:33 bjoern>

# This program TAKES A BIBDESK FILE (essentially a bibtex file) AND
# GENERATES A HTML PUBLICATION LIST FROM IT. This list is formatted
# using CSS tags and its intended use is to be insered into my
# homepage (a single html file as of the time of this writing). This
# programm can be called from within emacs and the actual insertion
# can be performed automatically, e.g., by using a small emacs lisp
# function embedded into the html-file. Something like this:
#       <!-- 
#       (let ((beg (progn (search-forward "startinsert")
#                         (forward-line 1)
#                         (point))))
#       (search-forward "/endinsert")
#       (beginning-of-line)
#       (delete-region beg (point))
#       (shell-command "~/path/to/this/file.pl" 1 "perl output to STDERR")
#       )   type C-x C-e after the closing brace to update publication listing
#       -->
#       <!--startinsert-->
#       <!--/endinsert-->
#
# Admittedly, this might not be the most elegant way to do it, but it
# is pretty effective.

# This program REQUIRES Text::BibTeX version >= 0.34 from
# http://starship.python.net/~gward/btOOL/ for reading the bibtex file
# and formatting author names etc. A very handy tool!

# All CONFIGURATION is currently hard-coded into this file; A bibtex
# file "$bibdeskfile" is read and the generated HTML output is pasted
# into STDOUT. File attachments (via the local-url-fields and assumed
# to be PDFs) will be copied to a directory
# "$attachmentdirabsolute". During the process, lists of authors will
# be condensed, by removing any author whose name matches /R.*ffer/ --
# you might want to adapt that for your purpuses.

# Use this software for whatever you want at your own risk, but don't
# blame me for anything. If you want to report any improvements you've
# made back to me, please do so to the email address to be found at
# http://bjoern.rueffer.info. I'd appreciate that!

use warnings; 
use strict;
use Text::BibTeX qw(:nameparts :joinmethods);
use Text::BibTeX::Name;
use Text::BibTeX::NameFormat;

### setup 
my $attachmentdirrelative = "attachments"; # relative directory name: where to link to for attached files
my $attachmentdirabsolute = "/path/to/Homepage/attachments"; # absolute directory name: where to put attached files
mkdir $attachmentdirabsolute unless (-d $attachmentdirabsolute);
print STDERR readpipe("rm -f $attachmentdirabsolute/*.pdf")."\n";

my $bibdeskfile = new Text::BibTeX::File "</path/to/Publications/mypublications.bib"; # which bibdesk file to load?

my $nameformat = new Text::BibTeX::NameFormat("fvlj", 1); # name formatting rules, see "man Text::BibTeX::Name"

sub texcleanedstring {          # does exactly what its name says, at least it takes care of everything that was needed in my case
  $_ = shift;
  s/\\verb\|(.+)\|/$1/g;         # remove \verb|...|
  s/\\em //g;                    # remove formatting
  s/\\ |~/ /g;                   # remove formatting
  s/\{\\"([auoAUO])\}/&$1uml;/g; # aou umlauts into HTML
  s/\{|\}//g;                    # remove curly braces 
  s/\$//g;                       # remove dollar signs
  s/--/&#8211;/g;                # correct en dashes
  return $_;
}

sub formatauthorstring { # reformat list of authors, put them into "(with x,y and z)"-form
  my @authors = split /\s+and\s+/, shift;
  my $authors=@authors;
  $authors--;
  my $f = "";
  $f .= "(with " unless $authors==0;
  foreach my $author (@authors) { # put authors into a x,y and z format
	my $name = new Text::BibTeX::Name($author); 
	next if ($author =~ /R.*ffer/i); # don't mention yourself
	$f .= $name->format($nameformat); # this takes care of the formatting, 
	if ($authors>2) {
	  $f .= ", ";
	  $authors--;
	} elsif ($authors==2) {
	  $f .= " and ";
	  $authors--;
	} 
  }
  $f .= ")" unless @authors==1;
#  print $f;
  return $f;
}

sub formateditorstring {        # similar to formatauthorstring, but different =-)
  my @authors = split /\s+and\s+/, shift;
  my $authors=@authors;
  my $f = "";
  foreach my $author (@authors) {
	my $name = new Text::BibTeX::Name($author);
	next if ($author =~ /R.*ffer/i);
	$f .= $name->format($nameformat);
	if ($authors>2) {
	  $f .= ", ";
	  $authors--;
	} elsif ($authors==2) {
	  $f .= " and ";
	  $authors--;
	} 
  }
  return $f;
}

my %jpapers = ();               # journal papers and book chapters
my %cpapers = ();               # conference papers
my %miscpapers = ();            # theses and reports

my $publicationcounter = 0;     # used for reversenumbering publications in html output

while (my $entry = new Text::BibTeX::Entry $bibdeskfile) # for each publication record in the bibtex file...
  {
	next unless $entry->parse_ok; # yeah, we might want to not consider everything, like @string{} and similar entries
	next unless $entry->type =~ /article|inproceedings|incollection|thesis|report/; # or @unpublished entries for that matter
	$publicationcounter += 1;
	$_= "";

	# some of my publication entries have a special key "publish-pdf"
	# to indicate whether an attached file should be made public or
	# not (this is a boolean field in BibDesk, very convenient to
	# handle). I'm assuming here that all attached files are of PDF
	# type.

	if ($entry->exists('publish-pdf')) { # create a copy of the attached (PDF!) file with a simplified filename
	  if (($entry->exists('local-url')  && $entry->type !~ /thesis|report/) && 
		  $entry->get('publish-pdf') =~ m/yes|true|1/i) {
		my $lurl = $entry->get('local-url'); # this field contains a link to (one of the) attached PDF files, see comment below
		$lurl =~ s/`/\\`/g;
		$_ .= "<div class=\"pdf\"><a href=\"$attachmentdirrelative/$publicationcounter.pdf\" class=\"pdf\"><img class=\"pdf\" src=\"Oficina-PDF-128x128.png\"></a></div>\n";
		print STDERR readpipe("cp \"$lurl\" $attachmentdirabsolute/$publicationcounter.pdf"); 
	  }
	}  
	# to actually generate these "local-url" fields, I've used an
	# applescript within BibDesk... see other blog post on that

	# the next few if-clauses take care of the formatting of each
	# individual publication record, as it appears on my online
	# publication listing. Obviously journal and conference papers
	# have to be treated differently. To some degree we are doing
	# BibTeX's job here, but by doing it "manually", we can actually
	# insert css-tags so that we can change the looks later on.

	if ($entry->type =~ /thesis/) {
	  $_ .= "<span class=\"pubtitle\">".$entry->get('title').".</span>\n  " if $entry->exists('title');
	  $_ .= "<br />";
	  if ($entry->type =~ /mastersthesis/) {
		$_ .= "Masters thesis, ";
	  } elsif  ($entry->type =~ /phdthesis/) {
		$_ .= "PhD thesis, ";
	  }
	  $_ .= "<i>".$entry->get('school')."</i>, " if $entry->exists('school');
	  $_ .= $entry->get('month').",\n  " if $entry->exists('month');
	  $_ .= $entry->get('year')."\n" if $entry->exists('year');
	}

	if ($entry->type =~ /report/) {
	  $_ .= "<span class=\"pubtitle\">".$entry->get('title').".</span>\n  " if $entry->exists('title');
	  $_ .= "".formatauthorstring($entry->get('author'))." " if ($entry->exists('author'));
	  $_ .= "<br />Technical report, ";
	  $_ .= "<i>".$entry->get('institution')."</i>, " if $entry->exists('institution');
	  $_ .= $entry->get('month').",\n  " if $entry->exists('month');
	  $_ .= $entry->get('year')."\n" if $entry->exists('year');
	}


	if ($entry->type eq 'article') {
	  #$_ .= formatauthorstring($entry->get('author')).": " if ($entry->exists('author'));   # this would be an altertnative way to list the authors; be consistent!
	  $_ .= "<span class=\"pubtitle\">".$entry->get('title').".</span>\n  " if $entry->exists('title');
	  $_ .= "".formatauthorstring($entry->get('author'))." " if ($entry->exists('author'));
	  $_ .= "<br /><i>".$entry->get('journal')."</i>" if $entry->exists('journal');
	  $_ .= " <b>".$entry->get('volume')."</b>" if $entry->exists('volume');
	  $_ .= ", " unless $entry->exists('volume');
	  $_ .= "(".$entry->get('number').")" if $entry->exists('number');
	  $_ .= ":".$entry->get('pages').",\n  " if $entry->exists('pages');
	  $_ .= $entry->get('year')."\n" if $entry->exists('year');
	}

	if ($entry->type eq 'incollection') {
	  $_ .= "<span class=\"pubtitle\">".$entry->get('title').".</span>\n  " if $entry->exists('title');
	  $_ .= "".formatauthorstring($entry->get('author'))." " if ($entry->exists('author'));
	  $_ .= "<br />In: ";
	  $_ .= "<i>".formateditorstring($entry->get('editor'))." (Eds.)</i>:\n  " if $entry->exists('editor');
	  $_ .= "<i>".$entry->get('booktitle')."</i>,\n  " if $entry->exists('booktitle');
	  $_ .= "pp. ".$entry->get('pages').",\n  " if $entry->exists('pages');
	  $_ .= "".$entry->get('publisher').", " if $entry->exists('publisher');
	  $_ .= "".$entry->get('address').", " if $entry->exists('address');
	  $_ .= $entry->get('year')."\n" if $entry->exists('year');
	}

	if ($entry->type eq 'inproceedings') {
	  $_ .= "<span class=\"pubtitle\">".$entry->get('title').".</span>\n  " if $entry->exists('title');
	  $_ .= "".formatauthorstring($entry->get('author'))." " if ($entry->exists('author'));
	  $_ .= "<br />In: <i>".$entry->get('booktitle')."</i>,\n  " if $entry->exists('booktitle');
	  $_ .= $entry->get('address').",\n  " if $entry->exists('address');
	  $_ .= "pp. ". $entry->get('pages').",\n  " if $entry->exists('pages');
	  $_ .= $entry->get('month').",\n  " if $entry->exists('month');
	  $_ .= $entry->get('year')."\n" if $entry->exists('year');
	}

	if ($entry->exists('note')) { # the note field contains "to appear", "submitted June 2009" etc. it might also contain a DOI as text
	  my $bibnote = $entry->get('note');
	  $bibnote =~ s/(http:\/\/[-a-zA-Z\/.:0-9]+)/<a href="$1">[external resource]<\/a>/g; # activate hyperlinks
	  $bibnote =~ s/DOI:([\w\d.\/-]+)/\n<span class=\"pubDOI\">DOI:<a class=\"pubDOI\" href=\"http:\/\/dx.doi.org\/$1\">$1<\/a><\/span>\n/; # activate silent DOIs
	  $_ .= "<span class=\"pubNOTE\">".$bibnote."</span>\n";
	}  

	my $publicationentry = texcleanedstring($_); # just to backup $_ to somewhere

	if ($entry->exists('doi')) { # some publications do have a doi-field, and this is how it gets formatted
	  my $doi = $entry->get('doi');
	  $publicationentry .= "\n<span class=\"pubDOI\">DOI:<a class=\"pubDOI\" href=\"http://dx.doi.org/$doi\">$doi</a></span>\n";
	}

	if ($entry->exists('online-information')) { # need that for Springer/Positivity disclaimer "The original publication is available at www.springerlink.com." 
	  $publicationentry .= "\n<br /><span class=\"pubONLINE-INFORMATION\">".$entry->get('online-information')."</span>\n";
	}

	if ($entry->exists('year')) { # now we generate hashes and reverse sort them by year (and publication title to make the keys unique)
	  my $tag = "";
	  $tag = $entry->get('year'); 
	  $tag .= texcleanedstring($entry->get('title'));

	  # finally decide to which category the publication entry we have
	  # been working on belongs to and file it
	  if ($entry->type =~ /article|incollection/) { 
		$jpapers{$tag} = $publicationentry;
	  } elsif ($entry->type eq 'inproceedings') {
		$cpapers{$tag} = $publicationentry;    
	  } elsif ($entry->type =~ /thesis|report/) {
		$miscpapers{$tag} = $publicationentry;    
	  }      

	} else {
	  warn "ERROR: Found an entry without a year. You'd want to correct that in the bibtex file! Meanwhile I'll ignore that entry.\nThe entry in question is: $_\n\n"; 
	  # this may actually happen despite good intentions, e.g., if
	  # there is a hard-coded html hyperlink in one of the special
	  # tags (e.g., the "online-information" key is a candidate for
	  # that).
	}
  }

print STDERR "Read $publicationcounter publication entries from BibTeX file.\n"; # report status 

# we are not actually generating a complete and valid html file, just
# a snipped to paste into something bigger, but could do something like this:

# print "<html><head><title>My list of publications</title></head><body>\n";

print "<h3>Journal papers and book chapters</h3>\n\n";
my @orderedkeys =  reverse sort { $a cmp $b } (keys %jpapers);
foreach my $key (@orderedkeys) {
  print "<div class=\"pub\"><div class=\"pubCOUNTER\">[$publicationcounter]</div>\n".$jpapers{$key}."</div>\n";
  $publicationcounter--;
}

print "\n\n<h3>Conference articles</h3>\n\n";
@orderedkeys =  reverse sort { $a cmp $b }  (keys %cpapers);
foreach my $key (@orderedkeys) {
  print "<div class=\"pub\"><div class=\"pubCOUNTER\">[$publicationcounter]</div>\n".$cpapers{$key}."</div>\n";
  $publicationcounter--;
}

print "\n\n<h3>Theses and reports</h3>\n\n";
@orderedkeys =  reverse sort { $a cmp $b }  (keys %miscpapers);
foreach my $key (@orderedkeys) {
  print "<div class=\"pub\"><div class=\"pubCOUNTER\">[$publicationcounter]</div>\n".$miscpapers{$key}."</div>\n";
  $publicationcounter--;
}

# print "</body></html>\n"; # we don't need this for our purposes

$bibdeskfile->close;            # not sure that this isn't being taken care of by Text::BibTeX anyways