/*
Copyright 2013 Cameron Palmer

This file is a part of Genezip.

Genezip is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

Genezip is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTIBILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with Genezip.  If not, see <http://www.gnu.org/licenses/>
*/

/*!
  \file huffman_code.h
  \brief generate a new huffman code from data, or decode values from an 
  existing code

  The hash regime used in the huffman_code class is described in 
  http://www.gzip.org/algorithm.txt.  The description is at the above 
  link, but the implementation is entirely my own.
 */
#ifndef __GENEZIP__HUFFMAN_CODE_H__
#define __GENEZIP__HUFFMAN_CODE_H__

#include <vector>
#include <stdexcept>
#include <utility>
#include <iostream>
#include <fstream>
#include <string>
#include <queue>
#include "genezip/binary_buffer.h"
#include "genezip/helper_functions.h"
#include "genezip/multilevel_huffman_hash.h"
namespace genezip_utils {
  /*!
    \class huffman_node
    \brief a node in a binary tree used to generate a set of huffman code
    lengths
  */
  class huffman_node {
  public:
    /*!
      \brief constructor
    */
    huffman_node() : _value(0), _count(0) {}
    /*!
      \brief copy constructor
      @param other existing node
    */
    huffman_node(const huffman_node &other)
      : _value(other._value),
      _count(other._count),
      _children(other._children) {}
    /*!
      \brief destructor
    */
    ~huffman_node() throw() {}
    /*!
      \brief set the "value" (represented literal) of the node
      @param value new value for this node
    */
    void set_value(unsigned value) {_value = value;}
    /*!
      \brief get the "value" (represented literal) of the node
      \return current value for this node
    */
    unsigned get_value() const {return _value;}
    /*!
      \brief set the "count" (frequency of associated literal) of the node
      @param count new count for this node
    */
    void set_count(unsigned count) {_count = count;}
    /*!
      \brief get the "count" (frequency of associated literal) of the node
      \return current count for this node
    */
    unsigned get_count() const {return _count;}
    /*!
      \brief add a child to this node
      @param child "pointer" to new child
    */
    void add_child(unsigned child) {_children.push_back(child);}
    /*!
      \brief set the vector of children to this node
      @param children vector of new children
      \warning overwrites any existing child information for this node
    */
    void set_children(const std::vector<unsigned> &children) {
      _children = children;
    }
    /*!
      \brief set two vectors of children as children to this node
      @param children1 first vector of children
      @param children2 second vector of children
      \warning overwrites any existing child information for this node
    */
    void set_children(const std::vector<unsigned> &children1,
		      const std::vector<unsigned> &children2) {
      _children = children1;
      _children.reserve(_children.size() + children2.size());
      for (std::vector<unsigned>::const_iterator iter = children2.begin();
	   iter != children2.end(); ++iter) {
	_children.push_back(*iter);
      }
    }
    /*!
      \brief get the current children at this node
      \return the current children at this node
    */
    const std::vector<unsigned> &get_children() const {return _children;}
    /*!
      \brief get the current number of children at this node
      \return the current number of children at this node
    */
    unsigned nchildren() const {return _children.size();}
    /*!
      \brief get iterator to first child at this node
      \return iterator to first child at this node
    */
    std::vector<unsigned>::const_iterator first_child() const {
      return _children.begin();
    }
    /*!
      \brief get iterator to end of child vector for this node
      \return iterator to end of child vector for this node
    */
    std::vector<unsigned>::const_iterator last_child() const {
      return _children.end();
    }
    /*!
      \brief copy over contents of an existing node to this node
      @param obj existing node
      \return reference to current node
    */
    huffman_node &operator=(const huffman_node &obj) {
      if (this != &obj) {
	set_value(obj.get_value());
	set_count(obj.get_count());
	set_children(obj.get_children());
      }
      return *this;
    }
    //! get the number of bytes used by this object
    //! \return the number of bytes used by this object
    inline unsigned bytes_of_compressed_memory() const {
      return 2 * _children.size() * sizeof(unsigned);
    }
  private:
    /*!
      \var _value
      \brief literal associated with this node
    */
    unsigned _value;
    /*!
      \var _count
      \brief frequency associated with this node
    */
    unsigned _count;
    /*!
      \var _children
      \brief children of current node
    */
    std::vector<unsigned> _children;
  };

  /*!
    \class huffman_node_compare
    \brief std::map requires sorting class for nodes
  */
  class huffman_node_compare {
  public:
    //! default constructor
    huffman_node_compare() {}
    //! destructor
    ~huffman_node_compare() throw() {}
    //! comparison operator wrapped by this class
    //! h1 first node to be compared
    //! h2 second node to be compared
    //! \return sorting order of h1 vs h2 (greater than)
    bool operator() (const huffman_node &h1, const huffman_node &h2) {
      //priority queue prioritizes greatest node, and requests < operator.
      //we want it to prioritize least node.
      return h1.get_count() > h2.get_count();
    }
  };
  /*!
    \class huffman_code
    \brief encode or decode a huffman code
  */
  class huffman_code {
  public:
    /*!
      \brief constructor
    */
    huffman_code() 
      : _min_length(0),
      _max_length(0),
      _code_nonfunctional(false) {}
    /*!
      \brief destructor
    */
    ~huffman_code() throw() {}
    /*!
      \brief from a set of counts (where index corresponds to literal), 
      generate a huffman code
      @param counts frequencies of literals, where the literal is the index
      @param tranche_one_threshold number of bits to be encoded at the first
      level of the huffman table
    */
    void generate_code(const std::vector<unsigned> &counts, 
		       unsigned                     tranche_one_threshold);
    /*!
      \brief translate a pair of literals and huffman bit lengths for 
      that literal to a gzip-compliant huffman code
      @param class_counts pairs of (literal, bit_length)
      @param tranche_one_threshold number of bits to be encoded at the 
      first level of the huffman table
      @param report verbosity?
    */
    void translate(const std::vector<std::pair<unsigned, unsigned> > &
		   class_counts, 
		   unsigned tranche_one_threshold,
		   bool report = false);
    /*!
      \brief encode a value using the huffman code (and note the 
      length of the encoded value)
      @param input uncoded value
      @param output resulting coded value
      @param output_length number of bits in encoded value
    */
    void encode(unsigned input, unsigned &output, unsigned &output_length) 
      const;
    //unsigned get_compatible_code();
    /*!
      \brief get an appropriate number of bits from the binary input data,
      and decode it using the stored huffman code
      @param buffer wrapper around input bit vector
      \return decoded value
    */
    unsigned pop_and_translate(binary_buffer &buffer) const;
    /*!
      \brief decode a huffman code
      @param huffcode encoded input datum
      @param length number of bits in input datum
      \return decoded value
    */
    inline unsigned translate_code(unsigned huffcode, unsigned length) const {
      if (_code_nonfunctional) 
	throw std::domain_error("huffman_code::translate_code: called on "
				"broken code");
      return _lookup_table.translate(huffcode, length);
    }
    //! get the number of bytes used by this object
    //! \return the number of bytes used by this object
    inline unsigned bytes_of_compressed_memory() const {
      unsigned res = (2 + 2*_length_distribution.size() + 2*_encoder.size()) * 
	sizeof(unsigned);
      return res + _lookup_table.bytes_of_compressed_memory();
    }
  private:
    unsigned _min_length; //!< minimum huffman code length
    unsigned _max_length; //!< maximum huffman code length
    //! calculated distribution of inputs
    std::vector<std::pair<unsigned, unsigned> > _length_distribution;
    //! hash table lookup (decoder) of this code
    multilevel_huffman_hash _lookup_table;
    //! correspondence table, from input to huffman code
    std::vector<std::pair<unsigned, unsigned> > _encoder;
    //! whether the code is not completely generated
    bool _code_nonfunctional;
  };
}
#endif //__HUFFMAN_CODE_H__
