/*
Copyright 2013 Cameron Palmer

This file is a part of Genezip.

Genezip is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

Genezip is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTIBILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with Genezip.  If not, see <http://www.gnu.org/licenses/>
*/

#include "genezip/suffix_tree.h"

void genezip_utils::suffix_tree::dfs(std::queue
				     <genezip_utils::suffix_tree_node *> 
				     *target,
				     bool delete_old,
				     unsigned delete_old_threshold) {
  suffix_tree_node *ptr = NULL;
  if (_root) {
    //do not enqueue _root
    _root->iterate_initialize();
    while ((ptr = _root->iterate_children())) {
      dfs(ptr, target, 2);
    }
  }
  //check duplicates
  if (target) {
    std::queue<genezip_utils::suffix_tree_node *> copy_target = *target;
    std::map<suffix_tree_node *, bool> duplicates;
    while (!copy_target.empty()) {
      if (duplicates.find(copy_target.front()) != duplicates.end())
	std::cerr << "error! duplicate node! " << copy_target.front() 
		  << std::endl;
      duplicates[copy_target.front()] = true;
      copy_target.pop();
    }
  }
}

void genezip_utils::suffix_tree::dfs(genezip_utils::suffix_tree_node *root,
				     std::queue
				     <genezip_utils::suffix_tree_node *> 
				     *target,
				     unsigned level,
				     bool delete_old,
				     unsigned delete_old_threshold) {
  suffix_tree_node *ptr = NULL, *deleted = NULL;
  suffix_tree_node *latest_child = NULL;
  //in this search, the tree is not guaranteed to be in a consistent state.
  //have to update the root match length to reflect later updates
  if (root) {
    if (target) target->push(root);
    //std::cout << "pushing " << root << " at level " << level << std::endl;
    root->iterate_initialize();
    while ((ptr = root->iterate_children())) {
      if (delete_old) {
	if (ptr->get_start() < delete_old_threshold) {
	  if ((deleted = delete_node(ptr))) {
	    _available.push(deleted);
	    continue;
	  } else throw std::domain_error("dfs-clean: unknown deletion error");
	}
      }
      dfs(ptr, target, level+1);
    }
    //and check the end flag just in case
    if (root->end()) {
      if (root->end()->get_start() < delete_old_threshold) {
	if ((deleted = delete_node(ptr))) {
	  _available.push(deleted);
	  return;
	} else throw std::domain_error("dfs-clean (end): unknown deletion "
				       "error");
      }
      if (target) target->push(root->end());
    }
  }
}

bool genezip_utils::child_vector::find_child(unsigned code,
					     suffix_tree_node *&ptr) const {
  ptr = _data.at(code) ? _data.at(code) : NULL;
  return ptr;
}

bool genezip_utils::child_vector::add_child(unsigned code,
					    suffix_tree_node *ptr) {
  bool retval = !_data.at(code) && ptr;
  if (retval)
    _data.at(code) = ptr;
  //if (d->at(ptr->get_start()) != code) throw std::domain_error("fail1");
  return retval;
}

bool genezip_utils::child_vector::force_add_child(unsigned code,
						  suffix_tree_node *ptr) {
  bool retval = !_data.at(code) && ptr;
  _data.at(code) = ptr;
  //if (d->at(ptr->get_start()) != code) throw std::domain_error("fail2");
  return retval;
}

bool genezip_utils::child_vector::remove_child(unsigned code) {
  if (_data.at(code)) {
    _data.at(code) = 0;
    return true;
  } else return false;
}

void genezip_utils::child_vector::iterate_initialize() {
  _data_current_access = _data.begin();
}

genezip_utils::suffix_tree_node *genezip_utils::child_vector::iterate_children() {
  while (_data_current_access != _data.end() &&
	 !(*_data_current_access)) ++_data_current_access;
  if (_data_current_access == _data.end())
    return NULL;
  return *(_data_current_access++);
}

void genezip_utils::child_vector::consistency_check(uncompressed_buffer *d) {
  for (unsigned i = 0; i < _data.size(); ++i) {
    if (_data.at(i) &&
	d->at(_data.at(i)->get_start()) != i)
      throw std::domain_error("failed here in vector");
  }
}

void genezip_utils::child_map::consistency_check(uncompressed_buffer *d) {
  for (std::map<unsigned, suffix_tree_node *>::const_iterator iter = _data.begin();
       iter != _data.end(); ++iter) {
    if (iter->second &&
	d->at(iter->second->get_start()) != iter->first)
      throw std::domain_error("failed here in map");
  }
}

bool genezip_utils::child_map::find_child(unsigned code, suffix_tree_node *&ptr) const {
  container_type::const_iterator finder = _data.find(code);
  ptr = (finder == _data.end()) ? NULL : finder->second;
  return ptr;
}

bool genezip_utils::child_map::add_child(unsigned code, suffix_tree_node *ptr) {
  bool retval = _data.find(code) == _data.end() && ptr;
  if (retval)
    _data[code] = ptr;
  //if (d->at(ptr->get_start()) != code) throw std::domain_error("fail3");
  return retval;
}

bool genezip_utils::child_map::force_add_child(unsigned code, suffix_tree_node *ptr) {
  bool retval = _data.find(code) == _data.end() && ptr;
  _data[code] = ptr;
  //if (d->at(ptr->get_start()) != code) throw std::domain_error("fail4");
  return retval;
}

bool genezip_utils::child_map::remove_child(unsigned code) {
  return (_data.erase(code));
}

void genezip_utils::child_map::iterate_initialize() {
  _data_current_access = _data.begin();
}

genezip_utils::suffix_tree_node *genezip_utils::child_map::iterate_children() {
  while (_data_current_access != _data.end() &&
	 !_data_current_access->second) ++_data_current_access;
  if (_data_current_access == _data.end())
    return NULL;
  //std::cout << "KEY/VALUE PAIR: [" << _data_current_access->first << "," << _data_current_access->second << "]" << std::endl;
  return (_data_current_access++)->second;
}

genezip_utils::suffix_tree::~suffix_tree() throw() {
  if (_root) delete _root;
  for (std::vector<suffix_tree_node *>::iterator iter = _deleteable.begin();
       iter != _deleteable.end(); ++iter) {
    if (*iter) delete [] (*iter);
  }
}

void genezip_utils::suffix_tree::allocate(unsigned number_to_allocate) {
  bool root_is_empty = !_root;
  if (root_is_empty) {
    //first allocate the root
    _root = new suffix_tree_node;
    _root->enable_vector_handler(_alphabet_size + 1);
    _root->set_language_size(_alphabet_size);
    _root->is_end(true);
    _root->root_default();
  }
  if (number_to_allocate) {
    //now allocate everything else
    //allocate in bulk
    suffix_tree_node *ptr = new suffix_tree_node[number_to_allocate];
    _deleteable.push_back(ptr);
    for (unsigned i = 0; i < number_to_allocate; ++i) {
      ptr[i].enable_map_handler();
      ptr[i].set_language_size(_alphabet_size);
      _available.push(ptr + i);
    }
  }
}

void genezip_utils::suffix_tree::add_node_to_root(unsigned code,
						  unsigned index) {
  if (_root) {
    if (_available.empty()) allocate(GENEZIP_SIMULTANEOUS_ALLOC_SIZE);
    add_node_to_root(next_available_node(), code, index);
  } else {
    throw std::domain_error("add_node_to_root: root DNE");
  }
}

genezip_utils::suffix_tree_node *genezip_utils::suffix_tree::next_available_node() {
  if (_available.empty()) {
    allocate(GENEZIP_SIMULTANEOUS_ALLOC_SIZE);
  }
  if (_available.empty())
    throw std::domain_error("genezip_utils::suffix_tree::next_available_node: out of memory");
  suffix_tree_node *ptr = _available.front(), *cptr = NULL;
  _available.pop();
  //this monstrosity may have children
  ptr->iterate_initialize();
  while ((cptr = ptr->iterate_children())) {
    _available.push(cptr);
  }
  ptr->clear_children();
  if (ptr->end()) {
    _available.push(ptr->end());
    ptr->clear_end();
  }
  ptr->is_end(false);
  return ptr;
}

genezip_utils::suffix_tree_node *genezip_utils::suffix_tree::delete_leaf() {
  suffix_tree_node *leaf = _leaves.front();
  if (leaf) {
    _leaves.pop();
    return delete_node(leaf);
  }
  return NULL;
}

genezip_utils::suffix_tree_node *genezip_utils::suffix_tree::delete_node
(suffix_tree_node *leaf) {
  //  root_check_consistency();
  //suffix_tree_node *leaf = _leaves.front();
  suffix_tree_node *parent = NULL, *last_child = NULL;
  //std::cout << "testing leaf (" << leaf << ")" << std::endl;
  if (leaf) {
    //_leaves.pop();
    //std::cout << "getting parent" << std::endl;
    parent = leaf->get_parent();
    if (!parent)
      throw std::domain_error("suffix_tree::delete_node: parent pointer "
			      "is NULL");
    //std::cout << "found parent; getting child" << std::endl;
    parent->remove_child(at_in_data(leaf->get_start()));
    //    root_check_consistency();
    //std::cout << "testing number of children" << std::endl;
    if (parent->number_children() < 2) {
      //merge the parent out of existence
      //std::cout << "starting iteration" << std::endl;
      parent->iterate_initialize();
      last_child = parent->iterate_children();
      if (!last_child) last_child = parent->end();
      if (!last_child)
	throw std::domain_error("suffix_tree::delete_leaf: "
				"inconsistency in child count");
      //this last child needs to take over for the parent
      last_child->set_start_and_parent(parent->get_start(),
				       parent->get_parent());
      //      root_check_consistency();
      //last_child->set_start(parent->get_start());
      //last_child->set_length(parent->get_length()+last_child->get_length());
      //last_child->first_value(parent->first_value());
      if (parent->get_parent()) {
	parent->get_parent()->force_add_child
	  (at_in_data(last_child->get_start()),
	   last_child);
	//	root_check_consistency();
      } else {
	//	throw std::domain_error("deleting root unexpectedly");
	//NULL grandparent means parent is root
	if (parent != _root)
	  throw std::domain_error("suffix_tree::delete_leaf: "
				  "expected root, found NULL");
	if (last_child != _leaves.front())
	  throw std::domain_error("suffix_tree::delete_leaf: "
				  "expected child, found ?");
	_root->set_start_and_parent(last_child->get_start(), NULL);
	//_root->set_start(last_child->get_start());
	//_root->set_length(last_child->get_length());
	//_root->first_value(last_child->first_value());
	_root->is_end(true);
	//	root_check_consistency();
      }
    }


    clearout(leaf, false);
  }
  //  root_check_consistency();
  return leaf;
}

void genezip_utils::suffix_tree::clearout(suffix_tree_node *ptr,
					  bool autopush) {
  suffix_tree_node *cptr = NULL;
  if (ptr) {
    //test to reclaim memory faster
    ptr->iterate_initialize();
    while ((cptr = ptr->iterate_children())) {
      clearout(cptr, true);
      //_available.push(cptr);
    }
    ptr->clear_children();
    if (ptr->end()) {
      _available.push(ptr->end());
      ptr->clear_end();
    }
    ptr->is_end(false);
    if (autopush) _available.push(ptr);
  }
}

void genezip_utils::suffix_tree::add_node_to_root(suffix_tree_node *ptr,
						  unsigned          value,
						  unsigned          index) {
  //  root_check_consistency();
  if (!ptr)
    throw std::domain_error("suffix_tree::add_node_to_root: "
			    "provided node is empty");
  ptr->clear_children();
  if (ptr->end()) {
    _available.push(ptr->end());
    ptr->clear_end();
  }
  if (_root) {
    //ptr->set_bounds(index, 1);
    if (_root->is_end()) {
      _root->is_end(false);
      _root->end(next_available_node());
      _root->end()->is_end(true);
      _root->end()->set_start_and_parent(1, _root);
      //_root->end()->set_start(1);
      //_root->end()->set_parent(_root);
      _root->force_add_child(value, ptr);
      ptr->set_start_and_parent(index, _root);
      //ptr->set_parent(_root);
      //ptr->is_end(true);
    } else {
      //ptr->first_value(value);
      ptr->set_start_only(index);
      if (at_in_data(ptr->get_start()) != 
	  value)
	throw std::domain_error("add_node_to_root::1: logic error (" + to_string<unsigned>(index) + ","
				+ to_string<unsigned>(value) + "," + to_string<unsigned>(at_in_data(ptr->get_start())) + ")");
      suffix_tree_node *existing_child = NULL;
      _root->find_child(value, existing_child);
      if (existing_child &&
	  at_in_data(ptr->get_start()) !=
	  at_in_data(existing_child->get_start())) {
	throw std::domain_error("add_node_to_root::2: logic error");
      }
      //std::cout << "check passed at " << ptr->get_start() << "=" << _raw_data->at(ptr->get_start())
      //	<< "; ";
      //if (!existing_child)
      //std::cout << "NULL=NULL" << std::endl;
      //else
      //std::cout << existing_child->get_start() << "=" << _raw_data->at(existing_child->get_start()) << std::endl;
      //suffix_tree_node *existing_child = _root->get_child(value);
      safely_merge_nodes(_root, existing_child, ptr);
    }
  } else throw std::domain_error("suffix_tree::add_node_to_root: "
				 "root is empty");
  _leaves.push(ptr);
}

void genezip_utils::suffix_tree::add_and_delete(unsigned value,
						unsigned index) {
  //  root_check_consistency();
  suffix_tree_node *ptr = delete_leaf();
  //root_check_consistency();
  if (ptr) {
    add_node_to_root(ptr, value, index);
    //root_check_consistency();
  } else {
    throw std::domain_error("suffix_tree::add_and_delete: "
			    "nothing to delete from tree....");
  }
}

void genezip_utils::suffix_tree::safely_repair_tree_structure(suffix_tree_node *parent,
							      suffix_tree_node *child,
							      suffix_tree_node *end) {
  if (!parent)
    throw std::domain_error("suffix_tree::safely_repair_tree_structure: "
			    "supplied parent node is NULL");
  if (!child) {
    end->is_end(false);
    end->set_and_update_parent(parent);
    parent->force_add_child(at_in_data(end->get_start()),
			    end);
    parent->end(NULL);
  } else if (end) {
    parent->end(NULL);
    unsigned amt_to_remove = 0;
    while (at_in_data(child->get_start()) ==
	   at_in_data(end->get_start())) {
      ++amt_to_remove;
    }
    suffix_tree_node *ptr = next_available_node();

    ptr->set_start_and_parent(child->get_start(), parent);
    
    //ptr->set_start(child->get_start());
    //ptr->set_length(amt_to_remove);
    ptr->add_child(at_in_data(child->get_start()), child);
    ptr->end(end);
    //child->set_start(child->get_start() + amt_to_remove);
    //child->set_length(child->get_length() - amt_to_remove);
    child->set_start_and_parent(child->get_start() + amt_to_remove, ptr);
    end->set_start_and_parent(end->get_start() + amt_to_remove, ptr);
    //child->set_parent(ptr);
    //ptr->set_parent(parent);
    //end->set_parent(ptr);
  } //else end is NULL, and nothing changes
}

void genezip_utils::suffix_tree::handle_end_at_site(suffix_tree_node *child_possibly_exists,
						    unsigned search_bound) {
  if (!child_possibly_exists) return;
  suffix_tree_node *end_extend = child_possibly_exists->end();
  suffix_tree_node *chi_extend = NULL;
  suffix_tree_node *par_extend = child_possibly_exists;
  suffix_tree_node *end_to_extend = NULL;
  //  child_possibly_exists->clear_end();
  while (end_extend) {
    //std::cout << "endextend on " << _raw_data->at(end_extend->get_start()) << " with parent "
    //	      << _raw_data->at(par_extend->get_start()) << std::endl;
    
    par_extend->find_child(at_in_data(end_extend->get_start()),
			   chi_extend);
    par_extend->clear_end();
    //update the parental start site
    //par_extend->set_start_only(end_extend->get_start() - 1);
    end_extend->is_end(false);
    //    root_check_consistency();
    //test if there's already a child in the applicable slot
    if (chi_extend) {
      if (at_in_data(end_extend->get_start()) !=
	  at_in_data(chi_extend->get_start())) {
	throw std::domain_error("found child mismatch: expected " +
				to_string<unsigned>(at_in_data(end_extend->get_start())) +
				", found " + to_string<unsigned>(at_in_data(chi_extend->get_start())));
      }
      //std::cout << "chiextend (raw data size is " << _raw_data->size() << ")" << std::endl;
      //see how much overlap there is
      unsigned max_compare_bound = (!chi_extend->get_length()) ? search_bound : chi_extend->get_start() + chi_extend->get_length();
      unsigned end_max_compare_bound = search_bound;
      if (max_compare_bound > size_of_data()) max_compare_bound = size_of_data();
      unsigned match_length = 0;
      //std::cout << "match compare bound is " << max_compare_bound << std::endl;
      //std::cout << "end extend start is " << end_extend->get_start() << std::endl;
      for ( ; chi_extend->get_start() + match_length < max_compare_bound &&
	      end_extend->get_start() + match_length < end_max_compare_bound; ++match_length) {
	if (at_in_data(end_extend->get_start() + match_length) != 
	    at_in_data(chi_extend->get_start() + match_length)) {
	  //std::cout << "match failed: " << _raw_data->at(end_extend->get_start() + match_length)
	  //<< "/" << _raw_data->at(chi_extend->get_start() + match_length) << std::endl;
	  break;
	} else {
	  //std::cout << "found a match: " << _raw_data->at(chi_extend->get_start() + match_length) << std::endl;
	}
      }
      //std::cout << "found a match of length " << match_length << std::endl;
      if (match_length == chi_extend->get_length() &&
	  chi_extend->get_length()) {
	if (end_to_extend) {
	  par_extend->end(end_to_extend);
	  end_to_extend->set_and_update_parent(par_extend);
	}
	
	end_extend->set_start_only(end_extend->get_start() + match_length);
	par_extend = chi_extend;
	end_to_extend = end_extend;
	end_extend = par_extend->end();
	//	root_check_consistency();
      } else if (!match_length) {
	std::cout << "match_length=" << match_length << "; chi_extend_start=" << chi_extend->get_start()
		  << "; end_extend_start=" << end_extend->get_start() << "; max_compare_bound="
		  << max_compare_bound << "; end_max_compare_bound=" << end_max_compare_bound
		  << "; val_at_end_start=" << at_in_data(end_extend->get_start())
		  << "; val_at_chi_start=" << at_in_data(chi_extend->get_start()) << std::endl;
	throw std::domain_error("impossible condition alpha");
      } else {
	//insert branch
	suffix_tree_node *brc_extend = next_available_node();
	brc_extend->set_start_and_parent(end_extend->get_start(), par_extend);
	brc_extend->end(end_extend);
	par_extend->force_add_child(at_in_data(brc_extend->get_start()),
				    brc_extend);
	chi_extend->set_start_and_parent(chi_extend->get_start() + match_length, brc_extend);
	end_extend->set_start_and_parent(end_extend->get_start() + match_length,
					 brc_extend);
	end_extend->is_end(true);
	
	brc_extend->force_add_child(at_in_data(chi_extend->get_start()),
				    chi_extend);
	if (end_to_extend) {
	  par_extend->end(end_to_extend);
	  end_to_extend->set_and_update_parent(par_extend);
	  end_to_extend = NULL;
	}
	//	root_check_consistency();
	break;
      }
    } else {
      //std::cout << "nochiextend" << std::endl;
      par_extend->force_add_child(at_in_data(end_extend->get_start()),
				  end_extend);
      end_extend->set_start_and_parent(end_extend->get_start(), par_extend);
      //      root_check_consistency();
      break;
    }
    //safely_merge_nodes(child_possibly_exists, chi_extend, end_extend);
  }
  if (end_to_extend) {
    if (par_extend->end())
      throw std::domain_error("impossible end condition!");
    par_extend->end(end_to_extend);
    end_to_extend->set_and_update_parent(par_extend);
  }
}

void genezip_utils::suffix_tree::safely_merge_nodes(suffix_tree_node *parent,
						    suffix_tree_node *child_possibly_exists,
						    suffix_tree_node *insertion) {
  //std::cout << "safely merging nodes" << std::endl;
  if (!parent)
    throw std::domain_error("suffix_tree::safely_merge_nodes: "
			    "supplied parent node is NULL");
  
  if (child_possibly_exists && insertion) {
    //update this node to reflect the fact it has been found later in the sequence
    //child_possibly_exists->set_start_only(insertion->get_start());
    if (child_possibly_exists->get_length() == 1) {
      if (at_in_data(insertion->get_start()) != at_in_data(child_possibly_exists->get_start())) {
	std::cout << "check failed at " << insertion->get_start() << "=" << at_in_data(insertion->get_start())
		  << "; " << child_possibly_exists->get_start() << "=" << at_in_data(child_possibly_exists->get_start()) << std::endl;
	throw std::domain_error("safely_merge_nodes::1: detected logic error");
      }
      handle_end_at_site(child_possibly_exists, insertion->get_start() + 1);
      //      root_check_consistency();
      //plop the insertion in the end slot of the child
      if (at_in_data(insertion->get_start()) != at_in_data(child_possibly_exists->get_start())) {
	throw std::domain_error("safely_merge_nodes::2: detected logic error");
      }
      child_possibly_exists->set_start_and_parent(insertion->get_start(), child_possibly_exists->get_parent());
      insertion->set_start_and_parent(insertion->get_start() + 1, child_possibly_exists);
      insertion->is_end(true);
      child_possibly_exists->end(insertion);
      //      root_check_consistency();
    } else {
      //need a new internal node
      suffix_tree_node *internal_node = next_available_node();
      internal_node->is_end(false);
      //determine how long a sequence insertion and child_possibly_exists share
      unsigned shared_sequence_length = 1, seq1 = child_possibly_exists->get_start(),
	seq2 = insertion->get_start();
      //this internal node is the single character added to the sequence
      internal_node->set_start_and_parent(insertion->get_start(), parent);
      insertion->set_start_and_parent(insertion->get_start() + shared_sequence_length, 
				      internal_node);
      insertion->is_end(true);
      internal_node->end(insertion);
      parent->force_add_child(at_in_data(internal_node->get_start()),
			      internal_node);
      
      //update child_possibly_exists to correspond to the sequence
      // not shared by it and insertion
      child_possibly_exists->set_start_and_parent(child_possibly_exists->get_start() + 
						  shared_sequence_length, internal_node);
      internal_node->force_add_child(at_in_data(child_possibly_exists->get_start()),
				     child_possibly_exists);
      //      root_check_consistency();
      //give internal_node all the children
      //recalculate length of child_possibly_exists
      //suffix_tree_node *next_child = NULL;
    }
  } else {
    parent->force_add_child(at_in_data(insertion->get_start()),
			    insertion);
    insertion->set_start_and_parent(insertion->get_start(),
				    parent);
    //    root_check_consistency();
  }
}

bool genezip_utils::suffix_tree::search(suffix_tree_node *node,
					unsigned start_index,
					unsigned adj_start_index,
					unsigned search_forward_bound,
					unsigned &match_start,
					unsigned &match_length) {
  if (!node) return start_index != adj_start_index;
  if (adj_start_index == search_forward_bound)
    return start_index != adj_start_index;
  //at the current node, check for end() and the actual node
  suffix_tree_node *end_candidate = node->end();
  suffix_tree_node *pri_candidate = NULL;
  
  node->find_child(at_in_data(adj_start_index), pri_candidate);
  if (end_candidate && pri_candidate &&
      node != _root &&
      end_candidate->get_start() < size_of_data() &&
      at_in_data(end_candidate->get_start()) == 
      at_in_data(pri_candidate->get_start())) {
    handle_end_at_site(node, search_forward_bound);
  }
  //only conduct this search if the path is sufficiently recent
  if (pri_candidate && 
      (start_index - pri_candidate->get_start() > 32768)) {
    //kill the wabbit
    if ((pri_candidate = delete_node(pri_candidate))) {
     _available.push(pri_candidate);
    }
    pri_candidate = 0;
  }
  if (pri_candidate) {
    //find out how much of this node corresponds to a match
    unsigned i = 0;
    unsigned initial_match_length = match_length;
    for ( ; i < (pri_candidate->get_length() ? pri_candidate->get_length() : (size_of_data() - pri_candidate->get_start())) &&
	    adj_start_index + i < size_of_data() &&
	    pri_candidate->get_start() + i < size_of_data(); ++i) {
      if (at_in_data(adj_start_index + i) == at_in_data(pri_candidate->get_start() + i)) {
	//	std::cout << "found a match" << std::endl;
	++match_length;
      } else {
	//std::cout << "match break" << std::endl;
	match_start = pri_candidate->get_start() + i - match_length;
	return match_length;
      }
    }
    if (i) {
      match_start = pri_candidate->get_start() - initial_match_length;
      unsigned stored_match_start = match_start, stored_match_length = match_length;
      //kill the match if it has extended beyond the bleeding edge
      if (match_start + match_length > start_index) return true;//start_index != adj_start_index;
      else if (match_start + match_length == start_index) {
	//if it hits the bleeding edge and stops
	//allow it to continue past the edge, and then return the max of it and the recursive call
	
	//std::cout << "conducting an extended search" << std::endl;
	if (_raw_data) {
	  for ( ; stored_match_start + stored_match_length < size_of_data(); ++stored_match_length) {
	    if (at_in_data(stored_match_start + stored_match_length) !=
		at_in_data(adj_start_index + stored_match_length)) break;
	  }
	}
      }
      bool subsearch = search(pri_candidate,
			      start_index, 
			      start_index + match_length,
			      search_forward_bound,
			      match_start,
			      match_length);
      //prioritize the crossing result, as it's closer to the compression boundary
      if (_raw_data) {
	if (match_length <= stored_match_length) {
	  match_start = stored_match_start;
	  match_length = stored_match_length;
	}
      }
      return subsearch;
    }
    //return start_index != adj_start_index;
    throw std::domain_error("search: impossible condition");
  } else {
    return start_index != adj_start_index;
  }
}
