package ASD::Store;

#     This file is part of asd.
    
#     asd is free software; you can redistribute it and/or modify
#     it under the terms of the GNU General Public License as published by
#     the Free Software Foundation; either version 2 of the License, or
#     (at your option) any later version.

#     This program is distributed in the hope that it will be useful,
#     but WITHOUT ANY WARRANTY; without even the implied warranty of
#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#     GNU General Public License for more details.

#     You should have received a copy of the GNU General Public License
#     along with this program; if not, write to the Free Software
#     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

#     asd 0.2 Copyright 2004 Antonini Daniele <arpeda@gmail.com>

use strict;
use warnings;
use File::stat;

require Exporter;
require 5.005;

our @ISA = qw(Exporter);

# Items to export into callers namespace by default. Note: do not export
# names by default without a very good reason. Use EXPORT_OK instead.
# Do not simply export all your public functions/methods/constants.

# This allows declaration	use ASD::Function ':all';
# If you do not need this, moving things directly into @EXPORT or @EXPORT_OK
# will save memory.
our %EXPORT_TAGS = ( 'all' => [ qw( store_without_occurrence
				    store_without_occurrence_fast_search
				    ) ] );

our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );

our @EXPORT = qw();

our $VERSION = '0.01';

sub store_without_occurrence_fast_search {
    my $ref_hash_man_page_title = shift;
    my $ref_inverted_list = shift;
    my $ref_index_file = shift;
    
    _write_man_page_title( $ref_index_file->{'man_page_title'}, $ref_hash_man_page_title );

#ordinamento lessicografico
#foreach my $key ( sort {length($a)<=>length($b)} sort keys %h ) {
#    print "$key," . $h{$key}."\n";
#}

    open FAST, ">", $ref_index_file->{'fast'};
    open LESSICO, ">", $ref_index_file->{'lessico'};
    open DOCUMENT, ">", $ref_index_file->{'document'};

    close DOCUMENT;
    close LESSICO;
    close FAST;

}

sub store_without_occurrence {
    my $ref_hash_man_page_title = shift;
    my $ref_inverted_list = shift;
    my $ref_index_file = shift;

    my $num_element_type;
    my $docID_type;
    my $docID_length_list = 0;
    my $freq_type;

    my @format_pointer;

    @format_pointer = _structure_pointers( $ref_inverted_list, $ref_hash_man_page_title );

    $num_element_type = $format_pointer[1];
    $docID_type = $format_pointer[2];
    $freq_type = $format_pointer[3];

    _write_man_page_title( $ref_index_file->{'man_page_title'}, $ref_hash_man_page_title );

    #write_lessico_and_document();

    open LESSICO, ">", $ref_index_file->{'lessico'};
    open DOCUMENT, ">", $ref_index_file->{'document'};

    print DOCUMENT "$format_pointer[0]";

    foreach my $word ( sort keys %$ref_inverted_list ) {
	
	print LESSICO "$word ".tell(DOCUMENT)."\n";
	
	print DOCUMENT pack( $num_element_type,scalar(keys %{$ref_inverted_list->{$word}}) );
	foreach my $docID ( sort {$a <=> $b} keys %{$ref_inverted_list->{$word}} ) {
	    
	    $ref_inverted_list->{$word}{$docID} = 255 if ( $ref_inverted_list->{$word}{$docID} > 255);
	    print DOCUMENT pack( $docID_type."C",$docID,$ref_inverted_list->{$word}{$docID} );
	}
    }
    
    close DOCUMENT;
    close LESSICO;
}

sub _write_man_page_title {
    my $file = shift;
    my $ref_man_page_hash_title = shift;

    open FILE_MAN_PAGE, ">", $file;
    foreach my $word( sort keys %$ref_man_page_hash_title ) {
	print FILE_MAN_PAGE "$ref_man_page_hash_title->{$word} $word\n";
    } 
    close FILE_MAN_PAGE;
}

# return an array
#  0) string for python
#  1) perl format for length
#  2) perl format for docID
sub _structure_pointers {
    my $ref_inverted_list = shift;
    my $ref_hash_man_page_title = shift;

    my $num_element_type;
    my $docID_type;
    my $freq_type = "C";
    my $docID_length_list = 0;

    my $structure_string = "";
    my @format = ();

    #calculate max length of docID_list
    foreach my $word ( sort keys %$ref_inverted_list ) {
	$docID_length_list = keys( %{$ref_inverted_list->{$word}} ) if ( $docID_length_list < scalar(keys %{$ref_inverted_list->{$word}}) )
    }

    if ( $docID_length_list <= 255 ) { #Unsigned Byte
	$structure_string = $structure_string."B";
	$num_element_type = "C";
    }
    elsif ( $docID_length_list > 255 && $docID_length_list <= 65535 ) { # Unsigned Short
	$structure_string = $structure_string."H";
	$num_element_type = "S";
    }
    elsif ( $docID_length_list > 65535 ) {
	$structure_string = $structure_string."I";
	$num_element_type = "I";
    }

    if ( keys %$ref_hash_man_page_title <= 255 ) { #Unsigned Byte
	$structure_string = $structure_string."B";
	$docID_type = "C";
    }
    elsif (  keys %$ref_hash_man_page_title > 255 && keys %$ref_hash_man_page_title <= 65535 ) { # Unsigned Short
	$structure_string = $structure_string."H";
	$docID_type = "S";
    }
    elsif ( keys %$ref_hash_man_page_title > 65535 ) {
	$structure_string = $structure_string."I";
	$docID_type = "I";
    }

    $structure_string = $structure_string."B";
    
    push @format, $structure_string;
    push @format, $num_element_type;
    push @format, $docID_type;
    push @format, "C";

    return @format;

}
