#!/usr/bin/perl -w
use strict;

while(<>) {
    chomp;
    my $file = $_;
    $file =~ s/^\.\///;
    my $cmd;
    my $ext;
    if (/\.pdf$/i) {
	$cmd = "pdftotext '$file' - |";
	$ext = "PDF";
    } elsif(/\.ps$/i) {
	$cmd = "ps2ascii '$file' |";
	$ext = "PS";
    } elsif(/\.ps\.gz$/i) {
	$cmd = "zcat '$file' | ps2ascii |";
	$ext = "PS";
    } else {
	warn "Unknown extension $file\n";
	next;
    }
    unless (open(FP, $cmd)) {
	warn "Cannot open $file\n";
	next;
    }
    # First non-empty line is probably the title
    my $title;
    while(<FP>) {
	next if /^\s*$/;
	last if non_ascii($_);
	last if /^Error:/;
	last if length($_) > 256;
	next if /\d+-\d+/;
	next if /\(\d+\)/;
	next if /^\S+$/;
	next if /\./;
	next if length($_) < 16;
	next if non_alpha($_);
	next unless /^[A-Z]/;
	next if /cmp-lg/;
	next if /Submitted/;
	next if /To appear/i;
	next if /Page/i;
	next if /Chapter/i;
	next if /Press/i;
	chomp;
	$title = $_;
	last;
    }
    close(FP);
    unless ($title) {
	warn "No text in $file\n";
	$title = $file;
    }
    my $author = $file;
    $author =~ s/\/.*//;
    $author = ucfirst($author);
    my $key = $file;
    $key =~ s/.*\///;
    $key =~ s/\..*//;
    $key =~ s/[^\w]/_/g;
    print qq{\@Article\{$key,\n\tAUTHOR = "$author",\n\tTITLE = "$title",\n\t$ext = "/ref/$file"\n\}\n};
	     
}

sub non_ascii {
    my $str = shift;
    my $len = length($str);
    my @tmp = ($str =~ /([^\w\s])/g);
    my $n = scalar(@tmp);
    return ($n > $len*0.2);
}

sub non_alpha {
    my $str = shift;
    my $len = length($str);
    my @tmp = ($str =~ /([a-z])/gi);
    my $n = scalar(@tmp);
    return ($n < $len * 0.8);
}
