#!/usr/bin/perl

# This is the output format.  Next are the three recognized old
# formats to be converted into this output format.

my @F = 
    (
# sort field
     'citekey',

# basic fields
     'author',
     'year',
     'title',

# title fields
     'booktitle',
     'journal',
     'series',
     'crossref',

# publisher information
     'publisher',
     'school',
     'organization',
     'institution',
     'howpublished',
     'address',

# other author fields
     'editor',
     'translator',		# I added this for the future

# other time fields
     'edition',
     'firstedition',		# This is the year book was first published

# citation information
     'volume',
     'number',
     'month',
     'chapter',
     'pages',

# meta fields
     'entrytype',		# This is the @ name in bibtex format
     'type',
     'key',

# note fields
     'annote',
     'note',

# my extra fields
     'category',
     'got',
     );

my %F;
for(my $i=0; $i<=$#F; $i++) {
    $F{$F[$i]}=$i;
}

my @book =
    (
     'author',			# 1 Author   
     'title',			# 2 Title    
     'publisher',		# 3 Publisher
     '_YEAR1_',			# 4 Year     
     '_YEAR2_',			# 5 x        
     'edition',			# 6 Edition  
     'category',		# 7 Category 
     'got',			# 8 Got      
     '_EMPTY_',			# 9 Note     
     'note',			# 10 LongNote
     );

sub book {
    my @in = @_;
    if($#in != $#book) {
	die "Wrong number of fields:\n" . join("::", @in);
    }
    my @out = ();
    $out[$#F]='';
    my ($year1, $year2);
    for (my $i=0; $i<=$#in; $i++) {
	if($book[$i] eq '_EMPTY_') {
	    die "Field $i not empty:\n" . join("\n", @in)
		if($in[$i] ne '');
	} elsif($book[$i] eq '_YEAR1_') {
	    $year1 = $in[$i];
	} elsif($book[$i] eq '_YEAR2_') {
	    $year2 = $in[$i];
	} else {
	    my $j = $F{$book[$i]};
	    die "Field $i not recognized:\n" . join("\n", @in)
		if not defined $j;
	    $out[$j] = $in[$i];
	}
    }
    if(defined $year2 and
       (not defined $year1 or
	$year2 > $year1)) {
	my $tmp = $year1;
	$year1 = $year2;
	$year2 = $tmp;
    }
    $out[$F{year}] = $year1;
    $out[$F{firstedition}] = $year2;
    $out[$F{entrytype}] = 'book';
    $out[$F{citekey}] = citekey($in[0], ($year2 or $year1));
    return @out;
}

my @aibib =
    (
     '_EMPTY_',			# 1 entry     
     '_IGNORE_',		# 2 key       
     'year',			# 3 year      
     'category',		# 4 cat       
     'author',			# 5 author    
     '_TITLE1_',		# 6 title1    
     '_TITLE2_',		# 7 title2    
     '_SRC_',			# 8 source    
     '_IGNORE_',		# 9 srctype   
     'pages',			# 10 pp       
     'volume',			# 11 vol      
     'number',			# 12 no       
     '_EMPTY_',			# 13 type     
     'month',			# 14 mo       
     'editor',			# 15 editor   
     'publisher',		# 16 publisher
     'address',			# 17 address  
     'note',			# 18 note     
     '_EMPTY_',			# 19 annote   
     '_EMPTY_'			# 20 longnote 
     );

sub aibib {
    my @in = @_;
    if($#in != $#aibib) {
	die "Wrong number of fields:\n" . join("::", @in);
    }
    my @out = ();
    $out[$#F]='';
    my($title, $src, $srctype);

    for (my $i=0; $i<=$#in; $i++) {
	if($aibib[$i] eq '_EMPTY_') {
	    die "Field $i not empty:\n" . join("\n", @in)
		if($in[$i] ne '');
	} elsif($aibib[$i] eq '_IGNORE_') {
	    
	} elsif($aibib[$i] eq '_TITLE1_') {
	    $title = $in[$i];
	} elsif($aibib[$i] eq '_TITLE2_') {
	    $title .= " ";
	    $title .= $in[$i];
	    $title =~ s/\s+/ /;
	    $title =~ s/^ //;
	    $title =~ s/ $//;
	    $out[$F{title}]=$title;
	} elsif($aibib[$i] eq '_SRC_') {
	    $src = $in[$i];
	    if($src =~ /\bproceedings\b/i or
	       $src =~ /\bproc\./i or
	       $src =~ /\bconference\b/i or
	       $src =~ /\bijcai\b/i or
	       $src =~ /\bsymposium\b/i or
	       $src =~ /\bworkshop\b/i
	       ) {
		$out[$F{entrytype}] = 'inproceedings';
		$out[$F{booktitle}] = $src;
	    } elsif($src =~ /\bmemo\b/i or
		    $src =~ /\btechnical report\b/i or
		    $src =~ /\bworking paper\b/i 
		    ) {
		$out[$F{entrytype}] = 'techreport';
		if($src =~ /^MIT AI Memo (\S+)$/) {
		    $out[$F{institution}] = "MIT";
		    $out[$F{type}] = "AI Memo";
		    $out[$F{number}] = $1;
		} elsif($src =~ /^MCC Technical Report (No\.|Number) (\S.+)$/) {
		    $out[$F{institution}] = "MCC";
		    $out[$F{type}] = "Technical Report";
		    $out[$F{number}] = $2;
		} elsif($src =~ /^MIT AI Working Paper (\S+)$/) {
		    $out[$F{institution}] = "MIT";
		    $out[$F{type}] = "AI Working Paper";
		    $out[$F{number}] = $1;
		} elsif($src =~ /^(\S+) Technical Report (\S+)$/i) {
		    $out[$F{institution}] = "$1";
		    $out[$F{type}] = "Technical Report";
		    $out[$F{number}] = $2;
		} else {
		    die "Cannot decide: $src";
		}
	    } else {
		$out[$F{entrytype}] = 'article';
		$out[$F{journal}] = $src;
	    }
	} else {
	    my $j = $F{$aibib[$i]};
	    die "Field $i not recognized:\n" . join("\n", @in)
		if not defined $j;
	    $out[$j] = $in[$i];
	}
    }
    $out[$F{citekey}] = citekey($out[$F{author}],$out[$F{year}]);
    return @out;
}

my @nlpbib =
    (
     'author',			# 1 author
     '_TITLE1_',		# 2 title1
     '_TITLE2_',		# 3 title2
     '_SRC_',			# 4 source
     'type',			# 5 type
     '_SRCTYPE_',		# 6 srctype
     'entrytype',		# 7 entry
     '_IGNORE_',		# 8 label
     'year',			# 9 yr
     'pages',			# 10 pp
     'got',			# 11 x
     'volume',			# 12 vol
     'number',			# 13 no
     'month',			# 14 mo
     'chapter',			# 15 ch
     '_EMPTY_',			# 16 ed
     'editor',			# 17 editor
     'publisher',		# 18 publisher
     'address',			# 19 address
     'series',			# 20 series
     '_EMPTY_',			# 21 key
     'organization',		# 22 org
     'note',			# 23 note
     '_EMPTY_',			# 24 longnote
     );

sub nlpbib {
    my @in = @_;
    if($#in != $#nlpbib) {
	die "Wrong number of fields:\n" . join("::", @in);
    }
    my @out = ();
    $out[$#F]='';
    my($title, $src, $srctype);

    for (my $i=0; $i<=$#in; $i++) {
	if($nlpbib[$i] eq '_EMPTY_') {
	    die "Field $i not empty:\n" . join("\n", @in)
		if($in[$i] ne '');
	} elsif($nlpbib[$i] eq '_IGNORE_') {
	    
	} elsif($nlpbib[$i] eq '_TITLE1_') {
	    $title = $in[$i];
	} elsif($nlpbib[$i] eq '_TITLE2_') {
	    $title .= " ";
	    $title .= $in[$i];
	    $title =~ s/\s+/ /;
	    $title =~ s/^ //;
	    $title =~ s/ $//;
	    $out[$F{title}]=$title;
	} elsif($nlpbib[$i] eq '_SRC_') {
	    $src = $in[$i];
	} elsif($nlpbib[$i] eq '_SRCTYPE_') {
	    if($in[$i] == 1) {
		$out[$F{journal}] = $src;
	    } elsif($in[$i] == 2) {
		$out[$F{booktitle}] = $src;
	    } elsif($in[$i] == 3) {
		$out[$F{institution}] = $src;
	    } elsif($in[$i] == 4) {
		$out[$F{school}] = $src;
	    } else {
		$out[$F{howpublished}] = $src;
	    }
	} else {
	    my $j = $F{$nlpbib[$i]};
	    die "Field $i not recognized:\n" . join("\n", @in)
		if not defined $j;
	    $out[$j] = $in[$i];
	}
    }
    $out[$F{citekey}] = citekey($out[$F{author}],$out[$F{year}]);
    return @out;
}


# This is function for constructing unique out key.

my %citekey;
sub citekey {
    my($a, $y) = @_;
    my $x;
    if($y =~ /^\d\d(\d\d)$/) {
	$x = $1;
    } else {
	$x = "00";
    }
    my $b = $a;
    $b =~ s/^\W+//;
    $b =~ s/\W.*//;
    $b .= $x;
    if(defined $citekey{$b}) {
	$b .= 'a';
	for my $z ('a' .. 'z') {
	    $b =~ s/.$/$z/;
	    last if not defined $citekey{$b};
	}
	if(not defined $citekey{$b}) {
	    $citekey{$b} = 1;
	} else {
	    die "Cannot build citekey: $a, $y\n";
	}
    } else {
	$citekey{$b} = 1;
    }
    return ($b);	
}

my $first = <>;
chomp($first);
my @row = split(/\t/, $first, -1);
if(scalar(@row) == 10) {
    print join("\t", book(@row)) . "\n";
    while(<>) {
	chop;
	@row = split(/\t/, $_, -1);
	print join("\t", book(@row)) . "\n";
    }
} elsif(scalar(@row) == 20) {
    print join("\t", aibib(@row)) . "\n";
    while(<>) {
	chop;
	@row = split(/\t/, $_, -1);
	print join("\t", aibib(@row)) . "\n";
    }
} elsif(scalar(@row) == 24) {
    print join("\t", nlpbib(@row)) . "\n";
    while(<>) {
	chop;
	@row = split(/\t/, $_, -1);
	print join("\t", nlpbib(@row)) . "\n";
    }
} else {
    die "Unknown format";
}

