#!/usr/local/bin/perl # # bibtv : view a BibTeX database and interactively search through it # Copyright 1992, Dana Jacobsen (jacobsd@cs.orst.edu) # This used to be called bibview before the X program came out. # # This program will take advantage of a feature added in 4.020, so that # version is recommended though not required. # #version = "0.1.0"; # 25 Aug 92 jacobsd Wrote original version #version = "0.2.0"; # 25 Aug 92 jacobsd added options #version = "0.2.1"; # 26 Aug 92 jacobsd added options #version = "0.2.2"; # 26 Aug 92 jacobsd fast status, command changes #version = "0.2.3"; # 26 Aug 92 jacobsd added load all in #version = "0.2.4"; # 8 Sep 92 jacobsd completely changed subject loading #version = "0.2.5"; # 8 Sep 92 jacobsd added display ranges, lb, others #version = "0.2.6"; # 30 Oct 92 jacobsd added write $version = "0.2.7"; # 11 Dec 92 jacobsd changed autoshow # # todo: # things in help that are not yet implemented # understand multiple bibliographies # don't throw away dup citekeys -- match dups, then pick different key # use time to correct secondsperentry instead of making it static # # All bug-fixes, suggestions, flames, and compliments gladly accepted. # # debuggging: Use "debug on|off|n", where 0 <= n >= 4 # # debug 0|off : no debug information. default. # debug 1|on : basic debug info, just little things, must not get in the way. # debug 2 : some detail added. Should be 1 line or less per record. # debug 3 : detailed. # debug 4 : spew to your heart's content. 1 record may generate 10 lines.. $ignorewords = "of and the in a on to for from an with by at as its"; $displaymode = 'brief'; $autoshow = 0; $sortoutput = 0; $showfileno = 1; @result = (); @allfiles = (); while (@ARGV) { $_ = shift @ARGV; /^--$/ && do { push(@files, @ARGV); undef @ARGV; next; }; /^-deb/ && do { $debugging = 1; next; }; /^-wor/ && do { $worddebug = 1; next; }; push (@files, $_); } $bibliodir = ($ENV{'BIBTVDIR'}) ? $ENV{'BIBTVDIR'} : '/files/home/solar/jacobsd/crystal/bibs/prod'; $subjectfile = ($ENV{'BIBTVSUB'}) ? $ENV{'BIBTVSUB'} : $bibliodir . '/SUBJECTS'; # see the routine loadsubjectfile for a description of the format $secondsperentry = 0.05; # how many seconds to read an entry? # globals: # # @allfiles : a list of all the files loaded # # $records{$citekey} : the full entry, verbatim # $fileno{$citekey} : the file # this came from (use @allfiles) # $authors{$citekey} : the author field # $titles{$citekey} : the title field # $inauthors{$name} : a $; seperated list of citekeys # $intitles{$word} : a $; seperated list of citekeys # # @result : a numbered array with citekeys of last result print "bibtv $version by Dana Jacobsen, 1992 PRE-ALPHA RELEASE!!\n"; foreach $infile (@files) { &readbibfile($infile); } $timetoquit = 0; until ($timetoquit) { print "> "; chop($command = ); $_ = $command; if (/^load /i) { local($dummy, $infile, $subject); if (($subject) = /load\s+all\s+in\s+(.+)/) { $debugging && print "Subject: $subject\n"; &readsubject($subject); } else { ($dummy, $infile) = split; &readbibfile($infile); } } elsif (/^write /i) { local($dummy, $outfile); ($dummy, $outfile) = split; &writebibfile($outfile); } elsif (s/^lb\///i) { &handlelookbib($_); print " -- ", ($#result == 0) ? "1 entry" : ($#result == -1 ? "no" : $#result+1, " entries"), " found.\n"; ($#result < $autoshow) && &printrecs(0, @result); } elsif (/^\w+\s*=/i) { &handlesearch($command); print " -- ", ($#result == 0) ? "1 entry" : ($#result == -1 ? "no" : $#result+1, " entries"), " found.\n"; ($#result < $autoshow) && &printrecs(0, @result); } elsif (s/^and\s+(\w+\s*=)/$1/i) { local(%dummyaar); local(@dresult) = @result; &handlesearch($_); grep($dummyaar{$_}++, @dresult); @result = grep($dummyaar{$_}, @result); print " -- ", ($#result == 0) ? "1 entry" : ($#result == -1 ? "no" : $#result+1, " entries"), " found.\n"; ($#result < $autoshow) && &printrecs(0, @result); } elsif (s/^or\s+(\w+\s*=)/$1/i) { local(@iresult, %dummyaar); local(@dresult) = @result; &handlesearch($_); grep($dummyaar{$_}++, @dresult); @iresult = grep(!$dummyaar{$_}, @result); @result = (@dresult, @iresult); print " -- ", ($#result == 0) ? "1 entry" : ($#result == -1 ? "no" : $#result+1, " entries"), " found.\n"; ($#result < $autoshow) && &printrecs(0, @result); } elsif (($var) = /^set\s+display\s+(brief|full|bibtex|detail)/i) { $displaymode = ($var eq brief) ? 'brief' : 'full'; print "display mode set to $displaymode.\n"; } elsif (($var) = /^set\s+autoshow\s+(true|false|\d+)/i) { $autoshow = ($var eq true) ? 100000 : ($var eq false) ? -2 : $var; print "autoshow set to $var.\n"; } elsif (($var) = /^set\s+showname\s+(true|false)/i) { $showfileno = ($var eq true) ? 1 : 0; print "showname set to $var.\n"; } elsif (($var) = /^set\s+sort\s+(true|false)/i) { $sortoutput = ($var eq true) ? 1 : 0; print "output sorting set to $var.\n"; } elsif (/^sort/i) { @result = &titlesort(@result); } elsif (/^list files/i) { print join("\n", @allfiles), "\n"; } elsif (/^list subjects/i) { local($sub); &loadsubjectfile(); printf " %-20s %16s\n", "Subject", "Number of records"; printf " %-20.20s %16.16s\n", "------------------------------", "------------------"; foreach $sub (sort keys %subjlist) { printf " %-20s %6d\n", $sub, $subjsize{$sub}; } } elsif (/^status/i) { print "", ($#result == 0) ? "1 record" : (($#result == -1) ? "no" : $#result+1, " records"), " in last search result.\n"; print "", ($#allfiles == 0) ? "1 file" : ($#allfiles == -1 ? "no" : $#allfiles+1, " files"), " loaded.\n"; if ($] > 4.019) { print scalar(keys(%records)), " records loaded.\n"; print scalar(keys(%inauthors)), " unique author last names.\n"; print scalar(keys(%intitles)), " unique words in titles.\n"; } else { $count = 0; $count++ while each %records; print $count, " records loaded.\n"; $count = 0; $count++ while each %inauthors; print $count, " unique author last names.\n"; $count = 0; $count++ while each %intitles; print $count, " unique words in titles.\n"; } print "display mode is $displaymode.\n"; print "autoshow is ", $autoshow ? "$autoshow" : "false", ".\n"; print "showname is ", $showfileno ? "true" : "false", ".\n"; print "automatic output sorting is ", $sortoutput ? "true" : "false", ".\n"; } elsif (/^select\s+all/i) { @result = sort keys(%records); } elsif (/^display\s+authors/i) { local(@dresult) = sort keys(%inauthors); print join("\n", @dresult), "\n"; } elsif (/^display\s+titles/i) { local(@dresult) = sort keys(%intitles); print join("\n", @dresult), "\n"; } elsif (/^find\s+duplicates/i) { &finddups(); } elsif (/^debug\s+(on|off|\d+)/i) { if ($1 eq 'off') { $debugging = 0; } elsif ($1 eq 'on') { $debugging = 1; } else { $debugging = $1; } } elsif (/^(detail|brief|show)/i) { $oldmode = $displaymode; if (s/^detail\s*//) { $displaymode = 'full'; } elsif (s/^brief\s*//) { $displaymode = 'brief'; } elsif (s/^show\s*//) { # nothing } /^$/ && ($_ = "all"); while (/\b(\d+)-(\d+)\b/) { local(@arange) = $1 .. $2; $debugging && print "turned $_ into "; s/\b$1-$2\b/join(" ", @arange)/eg; $debugging && print "$_\n"; } foreach $num (split) { $debugging > 1 && print "entry $num\n"; if ($num eq all) { &printrecs(0, @result); } else { &printrecs($num-1, $result[$num-1]); } } $displaymode = $oldmode; } elsif (/^quit/i) { $timetoquit = 1; } elsif (/^(\?|help)/i) { print "load -- load \n"; print "load all in -- load all files in biblio directory\n"; print "a= -- find author exactly matching \n"; print "a=/ -- find author matching the regex \n"; print "t= -- find title containing the word \n"; print "t=/ -- find title containing the regex \n"; print " = / -- find field containing the regex \n"; print "lb/ -- find any record containing text\n"; print "and -- logical and the results with \n"; print "or -- logical or the results with \n"; print "set display brief -- use one line per record when displaying\n"; print "set display full -- use BibTeX record when displaying\n"; print "set autoshow [true|false|##] -- display results after search\n"; print "set showname [true|false] -- display file names in detail display\n"; print "find duplicates -- selects duplicate entries\n"; print "select all -- select all records loaded\n"; print "list files -- list files loaded\n"; print "list subjects -- list subjects available for load all\n"; print "show [all | ..] -- show th result(s) or all\n"; print "brief [all | ..] -- display result(s) in brief format\n"; print "detail [all | ..] -- display result in BibTeX format\n"; print "write -- write search results to \n"; print "status -- show status of bibtv\n"; print "quit -- quit bibtv\n"; print "! -- execute via the shell\n"; print "-----------------------\n"; print "not implemented yet:\n"; print "unload -- remove entries from \n"; print "unload all in -- remove entries from biblios in \n"; print "and not -- logical NAND the results with \n"; print "display mode lib -- display records in tagged field format\n"; print "delete [all | ..] -- delete results from memory\n"; } elsif (/^\s*$/) { # nothing } elsif (s/^!//) { system($_); } else { print "Unrecognized command. Use ? for help.\n"; } } ######################################## ######################################## ######################################## ######################################## # write a bibliography file # sub writebibfile { local ($outfile) = @_; local ($oldmode) = $displaymode; $debugging && print "writing to $outfile\n"; open (SAVEOUT, ">&STDOUT"); open (STDOUT, ">$outfile") || ((warn "Can't open $outfile."), return 0); select (STDOUT); $displaymode = 'full'; &printrecs(0, @result); $displaymode = $oldmode; close (STDOUT); open (STDOUT, ">&SAVEOUT"); $| = 1; } ######################################## #Subject file should be in the format: # #compilers: # local/aho-hop-ull.bib 125 # lincs.bib 130 # parallel-compilers.bib #math: # ~euler/calculus.bib 12000 # ~/tex/bib/leibnitz.bib 50 # math/integrals.bib 157 # # . If the number is left off, the estimated # time will be wrong. The time is dependant on the "secondsperentry" variable # for the time taken to load an entry -- this is obviously machine dependent. sub readsubject { local ($subject) = @_; local ($bfile, $file); return 1 if &loadsubjectfile(); if ($subjlist{$subject}) { printf "This will take about %3.1f minutes to load %d entries.\n", $subjsize{$subject} * $secondsperentry / 60, $subjsize{$subject}; } else { print "Subject: $subject cannot be found in the subject list.\n"; print "Use 'list subjects' to see what subjects are available.\n"; return 0; } foreach $file (split(/ /, $subjlist{$subject})) { $bfile = "$bibliodir/$file"; if (-r $bfile) { &readbibfile($bfile); } else { &readbibfile($file); } } } ######################################## sub loadsubjectfile { local($file, $num, $sub); $debugging && print "Subject file: $subjectfile\n"; open(SFILE, $subjectfile)||((warn "Can't open subject file: $!\n"), return 1); undef(%subjsize, %subjlist, $sub); while () { if (/^\s*(.*):\s*$/) { $subjlist{$sub} =~ s/^\s+// if $sub; $sub = $1; $debugging && print "starting to load subject $sub.\n"; next; } next unless $sub; ($file, $num) = /^\s*(\S+)\s+(\d*)/; $subjsize{$sub} += $num; $subjlist{$sub} .= ' ' . $file; } $subjlist{$sub} =~ s/^\s+//; close(SFILE); return 0; } ######################################## sub readbibfile { local ($file) = @_; local ($num) = 0; local ($dups) = 0; local ($oldpipe) = $|; local ($fileno); local ($printfile); local (*INFILE) = 0; # this little gem is from Larry Wall -- expand ~user. $file =~ s#^(~([a-z0-9]+))(/.*)?$#((getpwnam($2))[7]||$1).$3#e; # this is mine -- handle ~/file $file =~ s#^(~)(/.*)?$#((getpwnam(getlogin))[7]||$1).$2#e; open (INFILE, $file) || open (INFILE, "$file.bib") || ((warn "Can't open $file: $!\n"), return 0); $| = 1; $printfile = $file; $printfile =~ s/^$bibliodir\//+/; print "loading $printfile.."; push(@allfiles, $file); $filenumber = $#allfiles; while (! eof(INFILE)) { $key = &bibtexread(*INFILE); if ((!$records{$key}) && $key) { $num++; ($num % 50) || print "."; $records{$key} = $entry{FULL}; $fileno{$key} = $filenumber; $authors{$key} = $entry{author}; if ($entry{booktitle}) { if ($entry{title}) { $titles{$key} = $entry{title} . ' in ' . $entry{booktitle}; } else { $titles{$key} = $entry{booktitle}; } } else { $titles{$key} = $entry{title}; } foreach $auth (split(/ and /, $entry{author})) { $name = &parsename($auth); $name =~ tr/A-Za-z0-9\-//cd; # delete non-alphanumerics $name =~ tr/A-Z/a-z/; # everything lowercase $inauthors{$name} .= $; . $key; } foreach $word (split(/\s+/, $titles{$key})) { $word =~ tr/A-Za-z0-9\-//cd; $word =~ tr/A-Z/a-z/; if ($word && (index($ignorewords, $word) == -1)) { if ($worddebug) { print "$word\n"; } $intitles{$word} .= $; . $key; } } } else { $key && $debugging && print "Duplicate cite key: not adding $key\n"; $key && $dups++; } } $| = $oldpipe; print "$num entries."; print $dups ? " $dups duplicate cite keys.\n" : "\n"; } ######################################## sub handlelookbib { local($words) = @_; local($afield, $lfield, $lvalue, $lval, $cite, $val); local(%resaar); local(@res) = keys(%records); local(@res2); print "lookbib: \"$words\""; foreach $w ( split(/\s+/, $words) ) { $debugging > 1 && print "hlb: word is $w\n"; eval '/$w/i'; $@ && do { warn $@; next; }; /$w/i; foreach $cite (@res) { ($records{$cite} =~ //) && (push(@res2, $cite)); } $debugging && print "hlb: ", $#res2+1, " found so far\n"; @res = @res2; @res2 = (); } @result = @res; # weed out any duplicates that might have cropped up @result = grep($resaar{$_}++ == 0, @result); $sortoutput && (@result = &titlesort(@result)); } ######################################## sub handlesearch { local($_) = @_; local($afield, $lfield, $lvalue, $lval, $cite, $val); local(%resaar); local(%atolar) = ('a', 'author', 't', 'title', ); @result = (); if ( ($afield, $lvalue) = /^(\w)\s*=\s*(.*)$/ ) { $lfield = $atolar{$afield}; if (!$lfield) { print "No abbreviation for $afield. Spell out the field name.\n"; return; } } else { ($lfield, $lvalue) = /^(\w+)\s*=\s*(.*)$/; $lfield =~ tr/A-Z/a-z/; } $debugging > 1 && print "handlesearch> lfield: $lfield, lvalue: $lvalue\n"; if (substr($lvalue, 0, 1) eq '/') { # regex search substr($lvalue, 0, 1) = ''; print "$lfield is /$lvalue/"; if ($lfield eq author) { /$lvalue/i; # evaluate once for the whole loop while (($cite, $val) = each %authors) { ($val =~ //) && (push(@result, $cite)); } } elsif ($lfield eq title) { /$lvalue/i; # evaluate once for the whole loop while (($cite, $val) = each %titles) { ($val =~ //) && (push(@result, $cite)); } } elsif ($lfield eq all) { /$lvalue/i; while (($cite, $val) = each %records) { ($val =~ //) && (push(@result, $cite)); } } else { # long search. This is wrong. Need to have each field. while (($cite, $val) = each %records) { # ($val =~ ?$lfield\s*=\s*([{" ])[^\1]*$lvalue?i) # ($val =~ ?$lfield\s*=\s*([{" ])[^=]*$lvalue[^=]*\1?i) # && (push(@result, $cite)); next unless ($val =~ /$lvalue/i); (&retfield($val, $lfield) =~ /$lvalue/i) && (push(@result, $cite)); } } } else { # exact search print "$lfield is $lvalue"; $lvalue =~ tr/A-Za-z0-9\-//cd; $lvalue =~ tr/A-Z/a-z/; if ($lfield =~ /^author$/) { @result = split(/$;/, $inauthors{$lvalue}); } elsif ($lfield =~ /^title$/) { @result = split(/$;/, $intitles{$lvalue}); } else { print " -- Exact matching on $lfield not available.\n"; return; } shift @result; } # weed out any duplicates that might have cropped up @result = grep($resaar{$_}++ == 0, @result); $sortoutput && (@result = &titlesort(@result)); } ######################################## sub bytitlefield { $titles{$a} cmp $titles{$b}; } sub titlesort { return(sort bytitlefield @_); } ######################################## sub printrecs { local($cite, $auth, $names); local($num) = shift(@_); foreach $cite (@_) { next if (!$cite); $num++; $debugging > 2 && print "printrecs> cite: $cite\n"; if ($displaymode eq full) { $showfileno && print "\% $allfiles[$fileno{$cite}]:\n"; print $records{$cite}, "\n"; } else { $names = ''; foreach $auth (split(/ and /, $authors{$cite})) { $names .= ', ' . &parsename($auth); } $names =~ s/^, //; $debugging > 3 && print "printrecs> :$num:$names:$titles{$cite}:\n"; printf "%3d %-20.20s %-50.50s\n", $num, $names, $titles{$cite}; } } } ######################################## # sets @result to a list of citekeys of duplicates sub finddups { local($curtitle); local($cite, $ocite, $name); local($type, $otype, $otitle); local(@auths, @restauths); local(%resaar); @result = (); foreach $name (keys %inauthors) { $debugging && print "$name:"; @auths = split(/$;/, $inauthors{$name}); shift @auths; @restauths = @auths; foreach $cite (@auths) { $curtitle = $titles{$cite}; $curtitle =~ tr/A-Z/a-z/; ($type) = ($records{$cite} =~ /^\s*@\s*(\w+)/); $type =~ tr/A-Z/a-z/; shift(@restauths); foreach $ocite (@restauths) { next if $cite eq $ocite; ($otype) = ($records{$ocite} =~ /^\s*@\s*(\w+)/); $otype =~ tr/A-Z/a-z/; next if $type ne $otype; $otitle = $titles{$ocite}; $otitle =~ tr/A-Z/a-z/; next if $curtitle ne $otitle; # author in common, same type, and same title push(@result, $cite, $ocite); } } } $debugging && print "\n"; # weed out any duplicates that might have cropped up @result = grep($resaar{$_}++ == 0, @result); print "", ($#result == 0) ? "1 duplicate" : ($#result == -1 ? "no" : $#result+1, " duplicates"), " found.\n"; } ######################################## # # parsename takes a name in BibTeX format, and parses it into # parts. It returns the last name. The following globals are # set: # $First, $von, $Last, $Jr sub parsename { local($name) = @_; local($doinglast) = 0; local($part) = 0; local($p1, $p2, $p3); local($sing, $dummy); ($dummy, $sing, $dummy) = $name =~ /(^|\s){(.*)}(\s|$)/; $name =~ s/(^|\s){(.*)}(\s|$)/$1ASingleNameString$3/; $First = $von = $Last = $Jr = ''; ($p1, $p2, $p3) = split(/,/, $name, 3); if ($p3) { $First = $p3; $Jr = $p2; if ($p1 =~ s/^\s*{(.*)}\s*$/$1/) { $Last = $p1; } else { while ($p1 =~ /^[a-z]/) { ($part) = $p1 =~ /^(\S+)/; $p1 =~ s/^(\S+)\s*//; $von .= ' ' . $part; } $Last = $p1; } } elsif ($p2) { $First = $p2; if ($p1 =~ s/^\s*{(.*)}\s*$/$1/) { $Last = $p1; } else { while ($p1 =~ /^[a-z]/) { ($part) = $p1 =~ /^(\S+)/; $p1 =~ s/^(\S+)\s*//; $von .= ' ' . $part; } $Last = $p1; } } else { if ($p1 =~ s/^\s*{(.*)}\s*$/$1/) { $Last = $p1; } else { while ($p1 =~ /^[A-Z]/) { ($part) = $p1 =~ /^(\S+)/; $p1 =~ s/^(\S+)\s*//; $First .= ' ' . $part; } while ($p1 =~ /^[a-z]/) { ($part) = $p1 =~ /^(\S+)/; $p1 =~ s/^(\S+)\s*//; $von .= ' ' . $part; } if ($p1) { $Last = $p1; } else { ($Last) = $First =~ /\s+(\S+)\s*$/; $First =~ s/\s+\S+\s*$//; } } } $Last =~ s/ASingleNameString/$sing/; $Last =~ s/^\s+//; $von =~ s/^\s+//; $First =~ s/^\s+//; $Jr =~ s/^\s+//; # handle "et al" or "others" if ( (!$Last) && ($von =~ /^(others|et\.?\s*al)\.?$/i) ) { $Last = "others"; $von = ''; } if ($debugging > 3) { $name =~ s/ASingleNameString/$sing/; $name =~ s/^\s+//; printf "parsename> %20s: %-15s %-6s %-15s %-6s\n", $name, $First, $von, $Last, $Jr; } return ($Last); } ######################################## # # should do: # split out the field parsing into a seperate routine. # so we read in verbatim, then call &bibtexexplode to seperate # into %entry. # sub bibtexread { local(*FILE) = @_; local($braces) = 1; local($ent, $delim); local($field, $value, @values); %entry = (); while () { last if /^\s*\@/; } if (/^\s*\@\s*(string|preamble|comment)/i) { $debugging > 2 && print "::$_\n"; return 0; } if (eof(FILE)) { $debugging > 2 && print "::End of File::\n"; return 0; } if (!/,/) { # preamble is split on multiple lines $ent = $_; while () { $ent .= $_; last if /,/; } $_ = $ent; } $ent = $_; ( ($entry{type}, $delim, $entry{citekey}) = /^\s*\@\s*(\w+)\s*([{(])\s*(\S+)\s*,\s*$/) || do { print "Error getting line: $_\n"; return 0; }; $debugging > 2 && print "btread:", $entry{type}, $delim, $entry{citekey}, "\n"; if ($delim eq '{') { while () { $ent .= $_; while (/{/g) { $braces++; } while (/}/g) { $braces--; } last if ($braces <= 0); } $entry{FULL} = $ent; $ent =~ s/}\s*$//; } else { while () { $ent .= $_; last if $ent =~ s/[)]\s*$//; } $entry{FULL} = $ent . ')'; } $ent =~ s/\s+/ /g; @values = split(/,\s*(\w+)\s*=\s*/, $ent); $debugging > 2 && print "btread: all values: ", join("//", @values), "\n"; shift(@values); # zap the beginning info while (@values) { $field = shift(@values); $field =~ tr/A-Z/a-z/; $value = shift(@values); $value =~ s/^\s*{(.*)}\s*$/$1/; $value =~ s/^\s*"(.*)"\s*$/$1/; $entry{$field} = $value; } return($entry{citekey}); } sub retfield { local($ent, $fld) = @_; local(@vals) = split(/,\s*(\w+)\s*=\s*/, $ent); local($field, $value); shift(@vals); while (@vals) { $field = shift(@vals); $field =~ tr/A-Z/a-z/; $value = shift(@vals); next if $field ne $fld; return $value; } return 0; }