#!/usr/bin/perl

# $Id: processz3950queue,v 1.13.2.7 2006/10/09 13:48:22 hdl Exp $
use MARC::Record;
use C4::Context;
use DBI;
use strict;
use C4::Biblio;
use C4::Output;
use C4::Breeding;
use ZOOM;

=head1 NAME

processz3950queue. The script that does z3950 searches.

=head1 SYNOPSIS

This script can be used on a console (as normal user) or by the daemon-launch script.

Don't forget to EXPORT PERL5LIB=/PATH/to/KOHA before executing it if you use console mode.

It :
1- extracts z3950 requests from z3950queue table,
2- creates entries in z3950results to store the result
3- launch z3950 queries in asynchronous mode, using Unix fork()
4- store results in marc_breeding table.

The z3950 results queries are managed in z3950/search.pl script (poped-up window in MARC editor).

=head1 DESCRIPTION

=head2 table z3950servers

This table stores the differents z3950 servers.
A server is used if checked=1. The rank is NOT used as searches are now asynchronous.

=head2 table z3950queue

Table use to manage queries. A single line is created  in this table for each z3950 search request.
If more than 1 server is called, the C<servers> field containt all of them separated by |.

z3950 search requests are done by z3950/search.pl script.
At this stage, the fields are created with C<startdate> and C<done> empty

Then, the processz3950queue finds this entry and :
1- store date (time()) in C<startdate>
2- set C<done> = -1

when the requests are all sent :
2- set C<done> = 1
3- set C<enddate> (FIXME: always equal to startdate for me)

entries are deleted when :
- C<startdate> is more than 1 day ago.

FIXME:
- results, numrecords fields are unused

=head2 table z3950results

1 entry is created for each request, for each server called.
when created :
* C<startdate> is filled
* C<enddate> is null
* active is set to 0. when active is 0, it means the request has not been sent. when set to 1, it means it's on the way.

When a search is ended, C<enddate> is set, and C<active> is set to -1

=head1 How it's written

on every loop :
* delete old queries
* for each entry in z3950queue table that is not done=1 {
	for each search request	{
		for each server {
			try to connect
			look for results
			*      results can be :
			- existing and already running on another process (active=1)
			- existing & finished (active=-1)
			- non existent => create it and run the request.
		}
	}
}
=over 2

=cut


if ($< == 0) {
    # Running as root, switch privs
    if (-d "/var/run") {
	open PID, ">/var/run/processz3950queue.pid";
	print PID $$."\n";
	close PID;
    }
    # Get real apacheuser from koha.conf or reparsing httpd.conf
    my $apacheuser=C4::Context->config("httpduser");
    my $uid=0;
    unless ($uid = (getpwnam($apacheuser))[2]) {
	die "Attempt to run daemon as non-existent or superuser\n";
    }
    $>=$uid;
    $<=$uid;
}
my $db_driver = C4::Context->config("db_scheme") || "mysql";
my $db_name   = C4::Context->config("database");
my $db_host   = C4::Context->config("hostname");
my $db_user   = C4::Context->config("user");
my $db_passwd = C4::Context->config("pass");
my $dbh = DBI->connect("DBI:$db_driver:$db_name:$db_host",$db_user, $db_passwd);
# print "DBI : $db_driver:$db_name:$db_host,$db_user, $db_passwd";

# we begin the script, so "unactive" every pending request : they will never give anything, the script died :-(
my $sth=$dbh->prepare("update z3950results set active=0 where active<>-1");
$sth->execute;
$sth->finish;
$SIG{CHLD}='reap';
$SIG{HUP}='checkqueue';


my $logdir=$ARGV[0];

open PID, ">$logdir/processz3950queue.pid";
print PID $$."\n";
close PID;

my $reapcounter=0;
my $forkcounter=0;
my $checkqueue=1;
my $pid=$$;
my $lastrun=0;
while (1) {
	if ((time-$lastrun)>5) {
		$checkqueue = 1; # FIXME during testing, this line forces the loop. REMOVE it to use SIG{HUP} when "daemonized" !
# clean DB
		my $now = time();
		# delete z3950queue entries that are more than 1 day old
		my $sth = $dbh->prepare("delete from z3950queue where ?-startdate > 86400");
		$sth->execute($now);
		# delete z3950results queries that are more than 1 hour old
		$sth = $dbh->prepare("delete from z3950results where ?-startdate > 3600");
		$sth->execute($now);
		if ($checkqueue) { # everytime a SIG{HUP} is recieved
			$checkqueue=0;
# parse every entry in QUEUE
			$sth=$dbh->prepare("select id,term,type,servers,identifier from z3950queue where done<>1 or done is null order by id");
			$sth->execute;
			while (my ($id, $term, $type, $servers,$random) = $sth->fetchrow) {
# FIXME: there is no "else". So, if more than 12 requests at the same time => requests are lost !
				if ($forkcounter<12) {
					my $now=time();
# search for results entries for this request
					my $stk=$dbh->prepare("select id,server,startdate,enddate,numrecords,active from z3950results where queryid=?");
					($stk->execute($id)) || (next);
					my %serverdone;
# if no results => set queue to done = -1, set startdate and begin creating z3950results table entries & z3950 queries
					unless ($stk->rows) {
						my $sti=$dbh->prepare("update z3950queue set done=-1,startdate=? where id=?");
						$sti->execute($now,$id);
					}
# check which servers calls have already been created (before a crash)
					while (my ($r_id, $r_server,$r_startdate,$r_enddate,$r_numrecords,$active) = $stk->fetchrow) {
						if ($r_enddate >0) { # result entry exist & finished
							$serverdone{$r_server}=1;
						} elsif ($active) { # result entry exists & on the way (active=1) or already done (active=-1)
							$serverdone{$r_server}=1;
						} else { # otherwise
							$serverdone{$r_server}=-1;
						}
						# note that is the entry doesn't exist, the $serverdone{$r_server} is 0 (important later !)
					}
					$stk->finish;
					foreach my $serverinfo (split(/\|/, $servers)) {
						(next) if ($serverdone{$serverinfo} == 1); #(otherwise, is 0 or -1)
						my $totalrecords=0;
						my $globalname;
						my $globalsyntax;
						my $globalencoding;
# fork a process for this z3950 query
						if (my $pid=fork()) {
							$forkcounter++;
						} else {
# and connect to z3950 server
#FIXME: why do we need $dbi ? can't we use $dbh ?
							my $db_driver = C4::Context->config("db_scheme") || "mysql";
							my $db_name   = C4::Context->config("database");
							my $db_host   = C4::Context->config("hostname");
							my $db_user   = C4::Context->config("user");
							my $db_passwd = C4::Context->config("pass");
							my $dbi = DBI->connect("DBI:$db_driver:$db_name:$db_host",$db_user, $db_passwd);
							$dbh->{"InactiveDestroy"} = "true";
							my ($name, $server, $database, $user, $password,$syntax) = split(/\//, $serverinfo, 6);
							$globalname=$name;
							$globalsyntax = $syntax;
							$server=~/(.*)\:(\d+)/;
							my $servername=$1;
							my $port=$2;
							my $attr='';
							if ($type eq 'isbn') {
								$attr='1=7';
							} elsif ($type eq 'title') {
								$attr='1=4';
							} elsif ($type eq 'author') {
								$attr='1=1003';
							} elsif ($type eq 'lccn') {
								$attr='1=9';
							} elsif ($type eq 'keyword') {
								$attr='1=1016';
							}
							my $query = "\@attr $attr \"$term\"";
							print "$$/$id : Processing $type=$term at $name $server $database $syntax (".($forkcounter+1)." forks)\n";
# try to connect
							my $conn;
							my $noconnection=0;
							my $error=0;
# the z3950 query is builded. Launch it.
							if ($user) {
								$conn= new ZOOM::Connection($servername, $port, databaseName => "$database", user => "$user", password => "$password");
                                $noconnection=1 if $@;
							} else {
								$conn= new ZOOM::Connection($servername, $port, databaseName => $database);
                                $noconnection=1 if $@;
							}
							if ($noconnection || $error) {
# if connection impossible, don't go further !
								print "$$/$id : no connection at $globalname ($servername, $port,$database,$user,$password)\n";
								my $result = MARC::Record->new();
								my ($notmarcrecord,$alreadyindb,$alreadyinfarm,$imported) = ImportBreeding($result,-1,"$globalname server is NOT responding","",$random);
							} else {
# else, build z3950 query and do it !
								$now=time();
								my $resultsid="";
	# create z3950results entries.
								if ($serverdone{$serverinfo}==-1) { # if entry exist, just retrieve it
									my $stj=$dbi->prepare("select id from z3950results where server=? and queryid=?");
									$stj->execute($serverinfo,$id);
									($resultsid) = $stj->fetchrow;
									$stj->finish;
									print "$$/$id : 1 >> $resultsid\n";
								} else { # else create it : (may be serverdone=1 or 0)
									my $stj=$dbi->prepare("select id from z3950results where server=? and queryid=?");
									$stj->execute($serverinfo,$id);
									($resultsid) = $stj->fetchrow;
									$stj->finish;
									print "$$/$id : 2 >> $resultsid\n";
									unless ($resultsid) {
										$stj=$dbi->prepare("insert into z3950results (server, queryid, startdate) values (?,?,?)");
										$stj->execute($serverinfo, $id, $now);
										$resultsid=$dbi->{'mysql_insertid'};
										$stj->finish;
										print "$$/$id : creating and ";
									}
								}
								print "$$/$id : working on results entry $resultsid\n";
	# set active to 1 => this request is on the way.
								my $stj=$dbi->prepare("update z3950results set active=1 where id=?");
								$stj->execute($resultsid);
#######
								print "$$/$id : connected to $globalname\n";
								print "Global Syntax =>".$globalsyntax."<=";
 								eval {$conn->option(elementSetName => 'F')};
								eval { $conn->option(preferredRecordSyntax => 'USMARC');} if ($globalsyntax eq "USMARC" or $globalsyntax eq 'MARC21');
 								eval { $conn->option(preferredRecordSyntax => 'UNIMARC');} if ($globalsyntax eq "UNIMARC");
 								if ($@) {
 									print "$$/$id : $globalname ERROR: $@ for $resultsid\n";
	# in case pb during connexion, set result to "empty" to avoid everlasting loops
									my $stj=$dbi->prepare("update z3950results set numrecords=?,numdownloaded=?,highestseen=0,results='',enddate=? where id=?");
									$stj->execute(0,0,$now,$resultsid);
 								} else {
									my $rs=$conn->search_pqf($query) or warn "Connection Problem:".$conn->errmsg();
 									pe();
	# we have an answer for a query => get results & store them in marc_breeding table
									my $numresults=$rs->size();
									if ($numresults eq 0) {
										print "$$/$id : $globalname : no records found\n";
									} else {
										print "$$/$id : $globalname : $numresults records found, retrieving them (max 80)\n";
									}
 									pe();
									my $i;
									my $result='';
									my $scantimerstart=time();
									for ($i=1; $i<=(($numresults<80) ? ($numresults) : (80)); $i++) {
										my $rec=$rs->record($i-1);
 										my $marcdata = $rec->raw();
                                        if (C4::Context->preference("MARCFLAVOUR") eq "UNIMARC"){
                                          warn "marcdata :".$marcdata;
                                          my $marcrecord = MARC::Record->new_from_usmarc($marcdata);
                                          use Encode;
                                          use Encode::Guess;
                                          my $targetcharset="utf8" if (C4::Context->preference("TemplateEncoding") eq "utf-8");
                                          $targetcharset="latin1" if (C4::Context->preference("TemplateEncoding") eq "iso-8859-1");
                                          my $decoder = guess_encoding($marcdata, qw/utf8 latin1/);
                                          die $decoder unless ref($decoder);
                                          warn "decodage : ".$decoder->name;
                                          warn "decodage cible : ".$targetcharset;
                                          my $newRecord=MARC::Record->new();
                                          foreach my $field ($marcrecord->fields()){
                                            if ($field->tag()<'010'){
                                              $newRecord->insert_grouped_field($field);
                                            } else {
                                              my $newField;
                                              my $createdfield=0;
                                              foreach my $subfield ($field->subfields()){
                                                if ($createdfield){
                                                  if (($newField->tag eq '100')) {
                                                    substr($subfield->[1],26,2,"0103") if ($targetcharset eq "latin1");
                                                    substr($subfield->[1],26,4,"5050") if ($targetcharset eq "utf8");
                                                  }
                                                  map {char_decode($_,"UNIMARC");Encode::from_to($_,$decoder->name,$targetcharset);$_=~tr#\r##} @$subfield;
                                                  $newField->add_subfields($subfield->[0]=>$subfield->[1]);
                                                } else {
                                                  map {char_decode($_,"UNIMARC");Encode::from_to($_,$decoder->name,$targetcharset);$_=~tr#\r##} @$subfield;
                                                  $newField=MARC::Field->new($field->tag(),$field->indicator(1),$field->indicator(2),$subfield->[0]=>$subfield->[1]);
                                                  $createdfield=1;
                                                }
                                              }
                                              $newRecord->insert_grouped_field($newField);
                                            }
                                          }
#                                           $newRecord->encoding('UTF-8') if ($targetcharset eq 'utf8');
#                                           $newRecord->encoding('MARC-8') if ($targetcharset ne 'utf8');
                                          warn $newRecord->as_formatted(); 
                                          $globalencoding = $targetcharset;
                                          $result.=$newRecord->as_usmarc;
                                        } else {
                                          $globalencoding = ref($rec);
                                          $result.=$marcdata;
                                        }
									}
									my @x=split /::/,$globalencoding;
									my ($notmarcrecord,$alreadyindb,$alreadyinfarm,$imported) = ImportBreeding($result,-1,"Z3950-$globalname",$x[3],$random);
									my $scantimerend=time();
									my $numrecords;
									($numresults<80) ? ($numrecords=$numresults) : ($numrecords=80);
									my $elapsed=$scantimerend-$scantimerstart;
									if ($elapsed) {
										my $speed=int($numresults/$elapsed*100)/100;
										print "$$/$id : $globalname : $server records retrieved $numrecords SPEED: $speed\n";
									}
									my $q_result=$dbi->quote($result);
									($q_result) || ($q_result='""');
									$now=time();
									if ($numresults >0) {
										my $stj=$dbi->prepare("update z3950results set numrecords=?,numdownloaded=?,highestseen=0,results=?,enddate=? where id=?");
										$stj->execute($numresults,$numrecords,$q_result,$now,$resultsid);
									} else { # no results...
										my $stj=$dbi->prepare("update z3950results set numrecords=?,numdownloaded=?,highestseen=0,results='',enddate=? where id=?");
										$stj->execute($numresults,$numrecords,$now,$resultsid);
									}
 								}
								$stj=$dbi->prepare("update z3950results set active=-1 where id=?");
								$stj->execute($resultsid);
								eval {my $stj->finish};
							}
#OK, the search is done inactivate it..
							print "$$/$id : $server search done.\n";
							exit;
						}
					}
				} else {
# $forkcounter >=12
				}
				# delete z3950queue entry, as everything is done
				my $sti=$dbh->prepare("update z3950queue set done=1,enddate=? where id=?");
				$now=time();
				$sti->execute($now,$id);
			}
			$lastrun=time();
		}
		sleep 10;
	}
}

sub reap {
    $forkcounter--;
    wait;
}


sub checkqueue {
    $checkqueue=1;
}


sub pe {
	return 0;
}
