/*
Copyright 2013 Cameron Palmer

This file is a part of Genezip.

Genezip is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

Genezip is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTIBILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with Genezip.  If not, see <http://www.gnu.org/licenses/>
*/

#include "genezip/huffman_code.h"

void genezip_utils::huffman_code::generate_code(const std::vector<unsigned> &
						counts,
						unsigned tranche_one) {
  //generate the distribution of code lengths, then just call translate
  std::priority_queue<huffman_node,
		      std::vector<huffman_node>,
		      huffman_node_compare> generator_queue;
  //add the nodes
  std::vector<std::pair<unsigned, unsigned> > class_counts, 
    class_counts_pruned;
  for (unsigned i = 0; i < counts.size(); ++i) {
    class_counts.push_back(std::make_pair(i, 0));
    if (counts.at(i)) {
      huffman_node node;
      node.set_value(i);
      node.set_count(counts.at(i));
      generator_queue.push(node);
    }
  }
  //while the size is greater than one
  huffman_node val1, val2;
  if (generator_queue.size() == 1) {
    //hack away
    ++class_counts.at(generator_queue.top().get_value()).second;
  } else {
    while (generator_queue.size() > 1) {
      val1 = generator_queue.top();
      generator_queue.pop();
      val2 = generator_queue.top();
      generator_queue.pop();
      //increment the children
      if (val1.get_value() != GENEZIP_LENGTH_ERROR_UNSIGNED_CODE)
	++class_counts.at(val1.get_value()).second;
      if (val2.get_value() != GENEZIP_LENGTH_ERROR_UNSIGNED_CODE)
	++class_counts.at(val2.get_value()).second;
      for (std::vector<unsigned>::const_iterator iter = val1.first_child();
	   iter != val1.last_child(); ++iter) {
	if (*iter != GENEZIP_LENGTH_ERROR_UNSIGNED_CODE) ++class_counts.at(*iter).second;
      }
      for (std::vector<unsigned>::const_iterator iter = val2.first_child(); 
	   iter != val2.last_child(); ++iter) {
	if (*iter != GENEZIP_LENGTH_ERROR_UNSIGNED_CODE) ++class_counts.at(*iter).second;
      }
      huffman_node val3;
      val3.set_value(GENEZIP_LENGTH_ERROR_UNSIGNED_CODE);
      val3.set_count(val1.get_count() + val2.get_count());
      val3.set_children(val1.get_children(), val2.get_children());
      val3.add_child(val1.get_value());
      val3.add_child(val2.get_value());
      generator_queue.push(val3);
    }
  }
  //prune the class counts
  for (std::vector<std::pair<unsigned, unsigned> >::const_iterator iter = 
	 class_counts.begin(); iter != class_counts.end(); ++iter) {
    if (iter->second) class_counts_pruned.push_back(*iter);
  }
  if (class_counts_pruned.empty()) {
    _code_nonfunctional = true;
    return;
  }
  translate(class_counts_pruned, tranche_one, false);

  //confirm reciprocal behaviour: what we get from encoding returns the
  //same result when translated
  for (std::vector<std::pair<unsigned, unsigned> >::const_iterator iter = 
	 class_counts_pruned.begin(); 
       iter != class_counts_pruned.end(); ++iter) {
    unsigned encoded_value = 0, encoded_length = 0, should_be_original = 0;
    encode(iter->first, encoded_value, encoded_length);
    should_be_original = translate_code(encoded_value, encoded_length);
    if (iter->first != should_be_original)
      throw std::domain_error("ERROR: encoded " 
			      + to_string<unsigned>(iter->first) + " to "
			      + to_string<unsigned>(encoded_value) + "/" 
			      + to_string<unsigned>(encoded_length) 
			      + ", which translated to "
			      + to_string<unsigned>(should_be_original));
  }
}

void genezip_utils::huffman_code::encode(unsigned  input,
					 unsigned &output,
					 unsigned &output_length) const {
  if (_code_nonfunctional)
    throw std::domain_error("genezip_utils::huffman_code::encode: "
			    "called on broken coder");
  if (input >= _encoder.size())
    throw std::domain_error("genezip_utils::huffman_code::encode: "
			    "input out of acceptable domain: \"" 
			    + to_string<unsigned>(input) + "\"");
  output = _encoder.at(input).first;
  output_length = _encoder.at(input).second;
}

void genezip_utils::huffman_code::translate(const std::vector<std::pair<unsigned, unsigned> > &class_counts, unsigned tranche_one, bool report) {
  if (class_counts.empty())
    throw std::domain_error("genezip_utils::huffman_code::translate: "
			    "called with empty counts variable");
  //get min and max code lengths
  _min_length = _max_length = class_counts.at(0).second;
  for (unsigned i = 1; i < class_counts.size(); ++i) {
    if (class_counts.at(i).second < _min_length)
      _min_length = class_counts.at(i).second;
    if (class_counts.at(i).second > _max_length)
      _max_length = class_counts.at(i).second;
  }
  //histogram this nonsense
  std::vector<unsigned> counts_by_index(_max_length+1, 0);
  for (unsigned i = 0; i < class_counts.size(); ++i) {
    ++counts_by_index.at(class_counts.at(i).second);
  }
  //generate the codes
  unsigned code = 0;
  counts_by_index.at(0) = 0;
  std::vector<unsigned> available_codes(_max_length+1, 0);
  for (unsigned i = 1; i <= _max_length; ++i) {
    code = (code + counts_by_index.at(i-1)) << 1;
    available_codes.at(i) = code;
  }
  //assign the codes
  //reset the length distribution
  _length_distribution.clear();
  _length_distribution.resize(_max_length+1,
			      std::pair<unsigned, unsigned>(0,0));
  std::vector<std::pair<unsigned, std::pair<unsigned, unsigned> > > 
    huffman_dictionary;
  unsigned max_code = 0;
  huffman_dictionary.reserve(class_counts.size());
  for (unsigned i = 0; i < _length_distribution.size(); ++i) {
    _length_distribution.at(i).first = _length_distribution.at(i).second 
      = GENEZIP_LENGTH_ERROR_UNSIGNED_CODE;//available_codes.at(i);
  }
  for (unsigned i = 0; i < class_counts.size(); ++i) {
    if (class_counts.at(i).second) {
      if (max_code < class_counts.at(i).first) max_code = 
						 class_counts.at(i).first;
      huffman_dictionary.push_back(std::pair<unsigned,
				   std::pair<unsigned, unsigned> >
				   (class_counts.at(i).first,
										       std::pair<unsigned, unsigned>(available_codes.at(class_counts.at(i).second), class_counts.at(i).second)));
      if (_length_distribution.at(class_counts.at(i).second).first == 
	  GENEZIP_LENGTH_ERROR_UNSIGNED_CODE) {
	_length_distribution.at(class_counts.at(i).second).first = 
	  available_codes.at(class_counts.at(i).second);
      }
      _length_distribution.at(class_counts.at(i).second).second = 
	available_codes.at(class_counts.at(i).second);
      ++available_codes.at(class_counts.at(i).second);
    }
  }
  _lookup_table.populate(huffman_dictionary, tranche_one, _max_length);

  //and allow encoding
  _encoder.resize(max_code + 1, std::pair<unsigned, unsigned>(0, 0));
  for (unsigned i = 0; i < huffman_dictionary.size(); ++i) {
    _encoder.at(huffman_dictionary.at(i).first).first =
      huffman_dictionary.at(i).second.first;
    _encoder.at(huffman_dictionary.at(i).first).second =
      huffman_dictionary.at(i).second.second;
  }
}

unsigned genezip_utils::huffman_code::pop_and_translate(binary_buffer &buffer)
  const {
  if (_code_nonfunctional)
    throw std::domain_error("genezip_utils::huffman_code::pop_and_translate:"
			    " called on broken code");
  //pop the minimum number of bits still compatible with a code
  //in this language
  unsigned result = buffer.read(0, _min_length);
  //if this is a consistent value, you're done
  if (result >= _length_distribution.at(_min_length).first &&
      result <= _length_distribution.at(_min_length).second)
    return translate_code(result, _min_length);
  for (unsigned i = _min_length + 1; i <= _max_length; ++i) {
    //std::cout << "popping additional bit" << std::endl;
    result = (result << 1) | buffer.read(0,1);
    if (_length_distribution.at(i).first != GENEZIP_LENGTH_ERROR_UNSIGNED_CODE &&
	result >= _length_distribution.at(i).first &&
	result <= _length_distribution.at(i).second)
      return translate_code(result, i);
  }
  throw std::domain_error("genezip_utils::huffman_code::pop_and_translate:"
			  " inconsistent bit sequence pulled for current"
			  " language");
}
