#!/usr/bin/perl -w

use DBI;
use strict;

my $dbh = DBI->connect("DBI:mysql:categorize","eanders")
    or die "Noconnect: $!\n";

my %cats = ('Software Installation' => ['Software Packaging',
					 'OS Installation',
					 'Application Installation',
					 'User Customization'],
	    'Services' => ['Backup','Database','DNS','Mail',
			    'News','NFS','Printing','Web'],
	    'Configuration Management' => ['Site Configuration',
					   'Host Configuration',
					   'Site Move',
					   'Fault Tolerance'],
	    'Monitoring' => ['Configuration Discovery',
			     'System Monitoring','Network Monitoring',
			     'Data Display','Host Monitoring',
			     'Resource Accounting',
			     'Benchmarking','Performance Tuning'],
	    'Network' => ['LAN','WAN',
			  'Network Configuration',
			  'Host Tables'],
	    'Tools' => ['General Tool','File Synchronization',
			'Trouble Tickets',
			'Secure Root Access', 'Security',
			'Remote Access','File Migration',
			'Resource Cleanup'],
	    'Administrator Improvement' => ['Self Improvement', 
					    'Software Design',
					    'Models', 
					    'Training Administrators'],
	    'User Management' => ['Documentation', 'User Interaction', 
				  'Policy','Accounts','White Pages']
	    );

my %parent;
my %count;
my %skip;

while (my ($parent,$arr) = each %cats) {
    foreach my $child (@$arr) {
	$parent{$child} = $parent;
    }
}

my $sth = $dbh->prepare('select task_type, count(*) from papers where pubref like \'LISA%\' group by task_type') or die "Error: $!\n";
$sth->execute or die "Error2: $!";

#print "Hello $sth\n";
my $all = $sth->fetchall_arrayref;
foreach my $row (@$all) {
    my($task_m,$count) = @$row;
    foreach my $task (split(/,\s*/o,$task_m)) {
	$count{$task} ||= 0;
	$count{$task} += $count;
	$skip{$task} = 1 unless defined $parent{$task};
    }
}

print STDERR "# categories = ", scalar keys %count, "\n";
my $skipcount;
while (my($task,$foo) = each %skip) {
    warn "task $task has $count{$task} entries, but no parent.\n"
	if $count{$task} > 1;
    ++$skipcount;
}
my %pcount;
while (my ($parent, $arr) = each %cats) {
    my $count = 0;
    foreach my $child (@$arr) {
	$count += $count{$child};
    }
    $pcount{$parent} = $count;
}

foreach my $parent (reverse sort {$pcount{$a} == $pcount{$b} ? $b cmp $a : $pcount{$a} <=> $pcount{$b}} keys %pcount) {
    my @child_info;
    foreach my $child (reverse sort {$count{$a} == $count{$b} ? $b cmp $a : $count{$a} <=> $count{$b}} @{$cats{$parent}}) {
	push(@child_info,"  . $child [$count{$child}]\n");
    }

    print "* $parent [$pcount{$parent}]\n";
    print @child_info;
}
print "* Only one paper on topic [$skipcount]\n";

$dbh->disconnect;
