/*
--             This file is part of the New World OS project
--                 Copyright (C) 2007-2008  QRW Software
--           J. Scott Edwards - j.scott.edwards.nwos@gmail.com 
--                      http://www.qrwsoftware.com
--                      http://nwos.sourceforge.com
--
--   This program is free software: you can redistribute it and/or modify
--   it under the terms of the GNU General Public License as published by
--   the Free Software Foundation, either version 3 of the License, or
--   (at your option) any later version.
--
--   This program is distributed in the hope that it will be useful,
--   but WITHOUT ANY WARRANTY; without even the implied warranty of
--   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
--   GNU General Public License for more details.
--
--   You should have received a copy of the GNU General Public License
--   along with this program, in the file LICENSE.  If not, see 
--   <http://www.gnu.org/licenses/>.
--
--   You can also contact me via paper mail at:
--
--      QRW Software
--      P.O. Box 27511
--      Salt Lake City, UT 84127-0511, USA.
--
--   This program takes a compressed file as input, scans it for chunks
--   that only have one block in them and attempts to move that block
--   into an adjacent block to free up that chunk.
--
-- $Log: eliminate_one_block_chunks.c,v $
-- Revision 1.21  2008/09/01 00:22:59  jsedwards
-- Fix year in copyright.  NO code changes.
--
-- Revision 1.20  2008/08/31 16:42:02  jsedwards
-- Added assert around calls to nwos_read_object_from_disk because now it
-- returns false if it fails instead of asserting itself.
--
-- Revision 1.19  2007/12/09 16:38:42  jsedwards
-- Added code to modify the used_chunks in the header when copying file.
--
-- Revision 1.18  2007/12/09 16:00:03  jsedwards
-- Fix bug where first block wasn't being written to output file.
--
-- Revision 1.17  2007/12/09 13:35:44  jsedwards
-- Added code to recalculate crc32 block checksum.
--
-- Revision 1.16  2007/12/09 00:52:06  jsedwards
-- Added code to verify files.
--
-- Revision 1.15  2007/12/08 22:44:55  jsedwards
-- Remove unused variables and code.
--
-- Revision 1.14  2007/12/08 22:39:29  jsedwards
-- Moved objectify initialization to main so it can be done again for testing.
--
-- Revision 1.13  2007/12/08 21:59:18  jsedwards
-- Added function to copy compressed file to output file changing as we go.
--
-- Revision 1.12  2007/12/08 20:53:47  jsedwards
-- Added function to encrypt the reference list blocks that have changed.
--
-- Revision 1.11  2007/12/08 18:48:27  jsedwards
-- Added function to put the new references in the reference list blocks.
--
-- Revision 1.10  2007/12/08 17:59:44  jsedwards
-- Change to store the reference list block instead of the file reference.
--
-- Revision 1.9  2007/12/08 16:47:57  jsedwards
-- Fixed find_new_locations to deal with multiple blocks in a row.
--
-- Revision 1.8  2007/12/07 15:06:08  jsedwards
-- Changed locate function to print out stats of each found block.
--
-- Revision 1.7  2007/12/06 15:19:08  jsedwards
-- First code to handle more than one block in a row.
--
-- Revision 1.6  2007/12/06 14:08:46  jsedwards
-- Added function to find location to move block - incomplete.
--
-- Revision 1.5  2007/12/02 15:50:06  jsedwards
-- Added counting the number of files checked and verify the block count is
-- correct for each file.
--
-- Revision 1.4  2007/11/30 14:15:22  jsedwards
-- Added code to log and print out the number of single blocks that are used
-- int the reference list first block and extra blocks.
--
-- Revision 1.3  2007/11/29 16:06:40  jsedwards
-- Added code to scan the remainder of each reference list.
--
-- Revision 1.2  2007/11/29 13:51:50  jsedwards
-- Added code to search the first block in a reference list for single blocks.
--
-- Revision 1.1  2007/11/29 12:19:40  jsedwards
-- Move to attic.
--
-- Revision 1.4  2007/11/29 05:36:36  jsedwards
-- Added subroutine to scan the files for the blocks found in block table.
--
-- Revision 1.3  2007/11/29 05:20:57  jsedwards
-- Took code that scanned compressed file for single block chunks and made a
-- new function find_single_block_chunks.
--
-- Revision 1.2  2007/11/28 15:22:23  jsedwards
-- Changed blocks table to store previous and subsequent refs so that single
-- block chunks in a row can be detected.
--
-- Revision 1.1  2007/11/28 14:57:15  jsedwards
-- Initial version - only scans for one block chunks.
--
*/

#include <ctype.h>
#include <openssl/blowfish.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>    /* define sleep() */

#include "../crc32.h"
#include "../objectify_private.h"


#define DATA_STORAGE_SIZE (FILE_BLOCK_SIZE - 12)   /* 4 bytes each for flags, id, checksum */


int single_block_chunks = 0;
int total_chunks = 0;
int blocks_found = 0;
int first_block_blocks = 0;
int extra_block_blocks = 0;
int files_checked = 0;
int files_found = 0;
int refs_changed = 0;

struct
{
    uint32 previous;
    uint32 single;
    uint32 subsequent;
    uint32 new;
    Ref_List_First_Block* first_block;
    Ref_List_Extra_Block* extra_block;
} blocks[4096];

struct
{
    ObjRef file;
    int seq;
    Ref_List_First_Block* first_block;
    Ref_List_Extra_Block* extra_block;
} files[64];


void find_single_block_chunks(char* compressed_file_name)
{
    FILE* fp1;
    uint8 buf1[FILE_BLOCK_SIZE];
    Disk_Header header1;
    size_t read1;
    uint32 ref;
    uint32 prev_ref1;
    uint32 prev_ref2;
    uint32 chunk;
    uint32 prev_chunk1;
    uint32 prev_chunk2;
    int block_count;
    int block1;


    /* Open the old file and check it */

    fp1 = fopen(compressed_file_name, "r");
    if (fp1 == NULL)
    {
	perror(compressed_file_name);
	exit(1);
    }

    read1 = fread(buf1, 1, sizeof(buf1), fp1);

    if (read1 != FILE_BLOCK_SIZE)
    {
	if (ferror(fp1))
	{
	    perror(compressed_file_name);
	}
	else
	{
	    fprintf(stderr, "Unexpected end of file: %s\n", compressed_file_name);
	}
	fclose(fp1);
	exit(1);
    }

    memcpy(&header1, buf1, sizeof(header1));

    if (memcmp(header1.magic_number, "NWOS", 4) != 0)
    {
	fprintf(stderr, "Not an Objectify file: %s\n", compressed_file_name);
	fclose(fp1);
	exit(1);
    }


    read1 = fread(buf1, 1, sizeof(buf1), fp1);

    block_count = 0;
    ref = 0;
    prev_ref1 = 0;
    prev_ref2 = 0;
    chunk = 0xffffffff;
    prev_chunk1 = 0xffffffff;
    prev_chunk2 = 0xffffffff;

    while (!feof(fp1) && read1 == FILE_BLOCK_SIZE)
    {
	if (buf1[0] != 0 || buf1[1] != 0 || buf1[2] != 0 || buf1[3] != 0)
	{
	    printf("\n%s block %d - first four bytes not zero: %02x%02x%02x%02x\n",
		   compressed_file_name, block1, buf1[0], buf1[1], buf1[2], buf1[3]);
	    break;
	}

	nwos_4_uint8_to_uint32(&buf1[4], &ref);

	chunk = (ref - RESERVED_PUBLIC_BLOCKS) / USABLE_BLOCKS_PER_CHUNK;

	if (prev_chunk2 != 0xffffffff && prev_chunk2 != prev_chunk1 && prev_chunk1 != chunk)
	{
	    printf("%c  before: %08x (%05u.%05u)  single: %08x (%05u.%05u)  after: %08x (%05u.%05u)\n",
		   single_block_chunks > 0 && blocks[single_block_chunks-1].single == prev_ref2 ? '+' : '-',
		   prev_ref2, prev_chunk2, (prev_ref2 - RESERVED_PUBLIC_BLOCKS) % USABLE_BLOCKS_PER_CHUNK,
		   prev_ref1, prev_chunk1, (prev_ref1 - RESERVED_PUBLIC_BLOCKS) % USABLE_BLOCKS_PER_CHUNK,
		   ref,       chunk,       (ref - RESERVED_PUBLIC_BLOCKS) % USABLE_BLOCKS_PER_CHUNK);
	    fflush(stdout);

	    blocks[single_block_chunks].previous = prev_ref2;
	    blocks[single_block_chunks].single = prev_ref1;
	    blocks[single_block_chunks].subsequent = ref;
	    single_block_chunks++;
	}

	if (prev_chunk1 != chunk)
	{
	    total_chunks++;
	}

	prev_ref2 = prev_ref1;
	prev_ref1 = ref;

	prev_chunk2 = prev_chunk1;
	prev_chunk1 = chunk;

	read1 = fread(buf1, 1, sizeof(buf1), fp1);
    }

    if (!feof(fp1) && read1 != FILE_BLOCK_SIZE)
    {
	perror(compressed_file_name);
    }

    fclose(fp1);
}


static inline uint64 file_size_to_uint64(uint8 size[5])
{
    return ((uint64)size[0] << 32) |
	   ((uint64)size[1] << 24) |
	   ((uint64)size[2] << 16) |
	   ((uint64)size[3] << 8)  |
	    (uint64)size[4];
}


static int find_single_block(uint32 ref)
{
    uint32 mid;
    int i;
    int lower = 1;
    int upper = single_block_chunks;

    while (lower <= upper)
    {
	i = (lower + upper) / 2;

	mid = blocks[i-1].single;

	if (mid > ref)
	{
	    upper = i - 1;
	}
	else if (mid < ref)
	{
	    lower = i + 1;
	}
	else
	{
	    return i - 1;
	}
    }

    return -1;
}


static void check_ref_list_data(ObjRef* ref, uint8* data, size_t data_size, uint8 stored[4])
{
    uint8 computed[4];


    nwos_crc32_calculate((uint8*)data, data_size, computed);

    if (stored[0] != computed[0] || stored[1] != computed[1] || stored[2] != computed[2] || stored[3] != computed[3])
    {
	printf("reference list: %02x%02x%02x%02x - bad data checksum, ", ref->id[0], ref->id[1], ref->id[2], ref->id[3]);
	printf("size %zd - ", data_size);
	printf("computed %02x%02x%02x%02x - ", computed[0], computed[1], computed[2], computed[3]);
	printf("stored %02x%02x%02x%02x\n", stored[0], stored[1], stored[2], stored[3]);

	printf("\n");
	{
	  int i;
	  for (i = 0; i < data_size; i++)
	  {
	      printf("%02x%c", data[i], (i % 16) == 15 ? '\n' : ' ');
	  }
	  printf("\n");
	}
	exit(1);
    }
}


static bool check_file(ObjRef* file_ref)
{
    bool result = true;
    int i;
    uint64 file_length;
    uint32 num_blocks;
    uint32 block_count = 0;
    C_struct_File file_obj;
    uint8 ivec[IVEC_SIZE];
    Ref_List_First_Block first_block;
    Ref_List_Extra_Block extra_block;
    Ref_List_First_Block* first_block_ptr = NULL;
    Ref_List_Extra_Block* extra_block_ptr = NULL;
    int block_index;
    int seq;
    ObjRef next_ref;


    memset(ivec, 0, sizeof(ivec));

    assert(nwos_read_object_from_disk(file_ref, &file_obj, sizeof(file_obj)));  /* read the file object */

    if (!is_void_reference(&file_obj.block_list))
    {
	files_checked++;

	block_index = find_single_block(nwos_ref_to_word(&file_obj.block_list));

	if (block_index >= 0)
	{
	    printf("block_list: %08x  file: %08x\n", blocks[block_index].single, nwos_ref_to_word(file_ref));
	    first_block_blocks++;
	}

	file_length = file_size_to_uint64(file_obj.size);

	num_blocks = (file_length + DATA_STORAGE_SIZE - 1) / DATA_STORAGE_SIZE;

	memset(&first_block, 0, sizeof(first_block));

	nwos_read_object_from_disk_and_decrypt(&file_obj.block_list, &first_block.list, FILE_BLOCK_SIZE, ivec, nwos_random_sequence[0]);

	for (i = 0; i < MAX_REFS_IN_REF_LIST; i++)
	{
	    if (is_void_reference(&first_block.list.references[i])) break;

	    block_index = find_single_block(nwos_ref_to_word(&first_block.list.references[i]));

	    if (block_index >= 0)
	    {
		assert(blocks[block_index].first_block == NULL && blocks[block_index].extra_block == NULL);

		if (first_block_ptr == NULL)   /* this is the first one we've found */
		{
		    assert(files_found < 64);

		    copy_reference(&files[files_found].file, file_ref);

		    first_block_ptr = malloc(sizeof(first_block));

		    assert(first_block_ptr != NULL);

		    memcpy(first_block_ptr, &first_block, sizeof(first_block));

		    files[files_found].first_block = first_block_ptr;

		    files_found++;
		}

		blocks[block_index].first_block = first_block_ptr;

		printf("block: %08x  first_block: %p\n", blocks[block_index].single, blocks[block_index].first_block);

		blocks_found++;
	    }

	    block_count++;
	}

	check_ref_list_data(&file_obj.block_list,
			    (uint8*) first_block.refs,
			    (i + 1) * sizeof(ObjRef),                 /* include void or next_block_ref */
			    first_block.list.common_header.data_chksum);

	if (i == MAX_REFS_IN_REF_LIST && !is_void_reference(&first_block.next_block_ref))   /* more than one block */
	{
	    copy_reference(&extra_block.next_block_ref, &first_block.next_block_ref);

	    seq = 1;    /* use different sequence tables for each block in turn */

	    while (!is_void_reference(&extra_block.next_block_ref))    /* more blocks in this list */
	    {
		copy_reference(&next_ref, &extra_block.next_block_ref);

		block_index = find_single_block(nwos_ref_to_word(&next_ref));

		if (block_index >= 0)
		{
		    printf("ref_list: %08x  file: %08x\n", blocks[block_index].single, nwos_ref_to_word(file_ref));
		    extra_block_blocks++;
		}

		memset(&extra_block, 0, sizeof(extra_block));

		memcpy(&extra_block.ivec, ivec, sizeof(&extra_block.ivec));  /* save this ivec in case this block has to be written back to disk */
		nwos_read_object_from_disk_and_decrypt(&next_ref,
						       &extra_block.dirty,
						       FILE_BLOCK_SIZE, 
						       ivec,
						       nwos_random_sequence[seq]);

		if (first_block_ptr != NULL || extra_block_ptr != NULL)   /* already found one in this list */
		{
		    if (extra_block_ptr == NULL)   /* there was not a previous extra block */
		    {
			assert(first_block_ptr->next_block_ptr == NULL);

			first_block_ptr->next_block_ptr = malloc(sizeof(extra_block));

			assert(first_block_ptr->next_block_ptr != NULL);

			extra_block_ptr = first_block_ptr->next_block_ptr;
		    }
		    else   /* there was a previous block */
		    {
			assert(extra_block_ptr->next_block_ptr == NULL);

			extra_block_ptr->next_block_ptr = malloc(sizeof(extra_block));

			assert(extra_block_ptr->next_block_ptr != NULL);

			extra_block_ptr = extra_block_ptr->next_block_ptr;
		    }

		    memcpy(extra_block_ptr, &extra_block, sizeof(extra_block));
		}

		for (i = 0; i < MAX_REFS_IN_SIDECAR; i++)
		{
		    if (is_void_reference(&extra_block.refs[i])) break;

		    block_index = find_single_block(nwos_ref_to_word(&extra_block.refs[i]));

		    if (block_index >= 0)
		    {
			assert(blocks[block_index].first_block == NULL && blocks[block_index].extra_block == NULL);

			if (extra_block_ptr == NULL)   /* this is the first one we've found */
			{
			    assert(files_found < 64);

			    copy_reference(&files[files_found].file, file_ref);
			    files[files_found].seq = seq;

			    extra_block_ptr = malloc(sizeof(extra_block));

			    assert(extra_block_ptr != NULL);

			    memcpy(extra_block_ptr, &extra_block, sizeof(extra_block));

			    files[files_found].extra_block = extra_block_ptr;

			    files_found++;
			}

			assert(blocks[block_index].extra_block == NULL);

			blocks[block_index].extra_block = extra_block_ptr;

			printf("block: %08x  extra_block: %p\n", blocks[block_index].single, blocks[block_index].extra_block);

			blocks_found++;
		    }

		    block_count++;
		}

		check_ref_list_data(&next_ref, (uint8*) extra_block.refs, (i + 1) * sizeof(ObjRef), extra_block.checksum);

		if (i < MAX_REFS_IN_SIDECAR)
		{
		    void_reference(&extra_block.next_block_ref);
		}

		seq = (seq + 1) % NUM_STORED_SEQ;    /* wrap around if we've used all of them */
	    }
	}

	if (block_count != num_blocks)
	{
	    printf("Block count mismatch - computed %d  calculated: %d\n", num_blocks, block_count);
	}
    }

    return result;
}


/* this function takes the blocks that were found and figures out where to move them. */
void find_new_locations()
{
    int i;
    int j;
    int k;
    int in_a_row;
    uint32 offset;
    uint32 base;
    uint32 space;
    uint32 chunk_before;
    uint32 space_before;
    uint32 dist_before;
    uint32 chunk;
    uint32 chunk_after;
    uint32 space_after;
    uint32 dist_after;
    uint32 new;
    bool next_block;


    in_a_row = 0;

    for (i = 0; i < single_block_chunks; i++)
    {
	next_block = false;

	if (blocks[i].first_block != NULL || blocks[i].extra_block != NULL)
	{
	    if (in_a_row == 0)
	    {
		chunk_before = (blocks[i].previous - RESERVED_PUBLIC_BLOCKS) / USABLE_BLOCKS_PER_CHUNK;
		space_before = USABLE_BLOCKS_PER_CHUNK - (blocks[i].previous - RESERVED_PUBLIC_BLOCKS) % USABLE_BLOCKS_PER_CHUNK - 1;

		chunk_after = 0;
		space_after = 0;
	    }

	    chunk = (blocks[i].single - RESERVED_PUBLIC_BLOCKS) / USABLE_BLOCKS_PER_CHUNK;
	    offset = (blocks[i].single - RESERVED_PUBLIC_BLOCKS) % USABLE_BLOCKS_PER_CHUNK;

	    chunk_after = (blocks[i].subsequent - RESERVED_PUBLIC_BLOCKS) / USABLE_BLOCKS_PER_CHUNK;
	    space_after = (blocks[i].subsequent - RESERVED_PUBLIC_BLOCKS) % USABLE_BLOCKS_PER_CHUNK;

	    next_block = (i+1 < single_block_chunks && blocks[i].single == blocks[i+1].previous);
	    if (next_block && blocks[i+1].first_block == NULL && blocks[i+1].extra_block == NULL)
	    {
		next_block = false;   /* the next chuck wasn't found */
	    }

	    printf("%08x %u (%u) - %u (%u) - %u (%u) %c\n",
		   blocks[i].single, chunk_before, space_before, chunk, offset, chunk_after, space_after, next_block ? '+' : '-');

	    if (next_block)
	    {
		in_a_row++;
	    }
	    else
	    {
		for (j = i - in_a_row; j <= i; j++)
		{
		    if (space_before == 0)       /* there is no space before */
		    {
			dist_before = 1;            /* always use after */
			dist_after = 0;
		    }
		    else if (space_after == 0)   /* there is no space after */
		    {
			dist_before = 0;            /* always use after */
			dist_after = 1;
		    }
		    else
		    {
			dist_before = blocks[j].single - blocks[i - in_a_row].previous;
			dist_after = blocks[i].subsequent - blocks[j].single;
		    }

		    if (dist_before <= dist_after)
		    {
			base = RESERVED_PUBLIC_BLOCKS + (chunk_before + 1) * USABLE_BLOCKS_PER_CHUNK - space_before;
			space = space_before;
		    }
		    else
		    {
			base = RESERVED_PUBLIC_BLOCKS + chunk_after * USABLE_BLOCKS_PER_CHUNK;
			space = space_after;
		    }

		    new = base + random() % space;

		    for (k = j; k > i - in_a_row; k--)
		    {
			if (blocks[k-1].new <= new) break;

			blocks[k].new = blocks[k-1].new;
		    }

		    if (blocks[k-1].new == new)   /* duplicate */
		    {
			j--;   /* make it try this one again */
		    }
		    else
		    {
			blocks[k].new = new;
		    }
		}

		for (j = i - in_a_row; j <= i; j++)
		{
		    chunk = (blocks[j].single - RESERVED_PUBLIC_BLOCKS) / USABLE_BLOCKS_PER_CHUNK;
		    offset = (blocks[j].single - RESERVED_PUBLIC_BLOCKS) % USABLE_BLOCKS_PER_CHUNK;

		    chunk_after = (blocks[j].new - RESERVED_PUBLIC_BLOCKS) / USABLE_BLOCKS_PER_CHUNK;
		    space_after = (blocks[j].new - RESERVED_PUBLIC_BLOCKS) % USABLE_BLOCKS_PER_CHUNK;

		    printf("   %d: %08x %u (%u) -> %08x %u (%u)\n", j, 
			   blocks[j].single, chunk, offset,
			   blocks[j].new, chunk_after, space_after);

		    fflush(stdout);

		    if (j > i - in_a_row)
		    {
			assert(blocks[j-1].new < blocks[j].new);
		    }
		    else
		    {
			assert(blocks[j].previous < blocks[j].new);
		    }
		}

		in_a_row = 0;
	    }
	}
    }
}


/* this function puts the new references in the reference list blocks that were found and. */
void put_new_locations_in_ref_list()
{
    int i;
    int j;

    for (i = 0; i < single_block_chunks; i++)
    {
	if (blocks[i].first_block != NULL)
	{
	    for (j = 0; j < MAX_REFS_IN_REF_LIST; j++)
	    {
		if (nwos_ref_to_word(&blocks[i].first_block->refs[j]) == blocks[i].single) break;
	    }

	    assert(j < MAX_REFS_IN_REF_LIST);

	    nwos_word_to_ref(blocks[i].new, &blocks[i].first_block->refs[j]);

	    printf("first %p %d: %08x -> %08x\n", blocks[i].first_block, j, blocks[i].single, blocks[i].new);

	    refs_changed++;
	}
	else if (blocks[i].extra_block != NULL)
	{
	    for (j = 0; j < MAX_REFS_IN_SIDECAR; j++)
	    {
		if (nwos_ref_to_word(&blocks[i].extra_block->refs[j]) == blocks[i].single) break;
	    }

	    assert(j < MAX_REFS_IN_SIDECAR);

	    nwos_word_to_ref(blocks[i].new, &blocks[i].extra_block->refs[j]);

	    blocks[i].extra_block->dirty = -1;

	    printf("extra %p %d: %08x -> %08x\n", blocks[i].extra_block, j, blocks[i].single, blocks[i].new);

	    refs_changed++;
	}
    }
}


extern BF_KEY blowfish_key;


void encrypt_block(uint8 block[FILE_BLOCK_SIZE], uint8 ivec[IVEC_SIZE], uint8 seq_table[FILE_BLOCK_SIZE])
{
    uint8 buffer[FILE_BLOCK_SIZE];
    int i;
    int j;

    for (i = 8; i < FILE_BLOCK_SIZE; i++)
    {
	/* find where the next byte is stored */
	j = seq_table[i];    /* find where the next byte is stored */
	buffer[j] = block[i];    /* save this byte there */
    }

    /* and encrypt the remainder 8 bytes at a time */
    BF_cbc_encrypt((buffer + 8), (block + 8), (FILE_BLOCK_SIZE - 8), &blowfish_key, ivec, BF_ENCRYPT);
}


void encrypt_reference_lists()
{
    int i;
    int j;
    int seq;
    int blocks_changed = 0;
    Ref_List_Extra_Block* extra_block;
    uint8 ivec[IVEC_SIZE];

    for (i = 0; i < files_found; i++)
    {
	printf("file: %08x:\n", nwos_ref_to_word(&files[i].file));
	fflush(stdout);

	if (files[i].first_block != NULL)
	{
	    for (j = 0; j < MAX_REFS_IN_REF_LIST; j++)
	    {
		if (is_void_reference(&files[i].first_block->refs[j])) break;
	    }

	    assert(j > 0);

	    nwos_crc32_calculate((uint8*) &files[i].first_block->refs, 
				 (j + 1) * sizeof(ObjRef),
				 files[i].first_block->list.common_header.data_chksum);

	    memset(ivec, 0, sizeof(ivec));
	    encrypt_block((uint8*)&files[i].first_block->list, ivec, nwos_random_sequence[0]);
	    seq = 1;
	    extra_block = files[i].first_block->next_block_ptr;

	    printf("  first_block: %08x\n", nwos_ref_to_word(&files[i].first_block->list.common_header.id));
	    blocks_changed++;
	}
	else
	{
	    seq = files[i].seq;
	    extra_block = files[i].extra_block;
	    memcpy(ivec, extra_block->ivec, sizeof(ivec));
	}

	while (extra_block != NULL)
	{
	    if (extra_block->dirty)
	    {
		for (j = 0; j < MAX_REFS_IN_SIDECAR; j++)
		{
		    if (is_void_reference(&extra_block->refs[j])) break;
		}

		assert(j > 0);

		nwos_crc32_calculate((uint8*) &extra_block->refs, (j + 1) * sizeof(ObjRef), extra_block->checksum);

		printf("  changed: %08x\n", nwos_ref_to_word(&extra_block->id));

		extra_block->dirty = 0;
	    }
	    else
	    {
		printf("  unchanged: %08x\n", nwos_ref_to_word(&extra_block->id));
	    }

	    encrypt_block((uint8*)&extra_block->dirty, ivec, nwos_random_sequence[seq]);
	    seq = (seq + 1) % NUM_STORED_SEQ;    /* wrap around if we've used all of them */
	    extra_block = extra_block->next_block_ptr;
	    blocks_changed++;
	}

	printf("blocks changed: %d\n", blocks_changed);
	blocks_changed = 0;
    }
}


bool scan_for_matching_blocks()
{
    ObjRef object_class;
    C_struct_Class_Definition class_def_obj;
    ReferenceList* ref_list;
    int num_refs;
    ObjRef file_class_ref;
    int i;



    assert(nwos_find_private_class_definition("FILE", &file_class_ref));

    nwos_read_class_definition(&file_class_ref, &class_def_obj);

    ref_list = nwos_malloc_reference_list(&class_def_obj.header.object.references);

    num_refs = ref_list->common_header.num_refs;

    printf("num_refs: %d\n", num_refs);

    for (i = 0; i < num_refs; i++)
    {
	nwos_get_object_class(&ref_list->references[i], &object_class);

	if (is_same_object(&object_class, &file_class_ref))
	{
	    check_file(&ref_list->references[i]);
	}
    }

    nwos_free_reference_list(ref_list);
    ref_list = NULL;

    if (!is_void_reference(&class_def_obj.header.object.prev_version))
    {
	nwos_read_class_definition(&class_def_obj.header.object.prev_version, &class_def_obj);

	ref_list = nwos_malloc_reference_list(&class_def_obj.header.object.references);

	num_refs = ref_list->common_header.num_refs;

	printf("num_refs: %d\n", num_refs);

	for (i = 0; i < num_refs; i++)
	{
	    nwos_get_object_class(&ref_list->references[i], &object_class);

	    if (is_same_object(&object_class, &file_class_ref))
	    {
		check_file(&ref_list->references[i]);
	    }
	}

	nwos_free_reference_list(ref_list);
	ref_list = NULL;
    }

    return false;  /* for now only do once */
}


void* find_block_in_ref_list(uint32 ref)
{
    int i;
    Ref_List_Extra_Block* extra_block = NULL;

    for (i = 0; i < files_found; i++)
    {
	extra_block = NULL;

	if (files[i].first_block != NULL)
	{
	    if (nwos_ref_to_word(&files[i].first_block->list.common_header.id) == ref)
	    {
		return &files[i].first_block->list;
	    }
	    else
	    {
		extra_block = files[i].first_block->next_block_ptr;
	    }
	}
	else
	{
	    extra_block = files[i].extra_block;
	}

	while (extra_block != NULL)
	{
	    if (nwos_ref_to_word(&extra_block->id) == ref)
	    {
		return &extra_block->dirty;
	    }

	    extra_block = extra_block->next_block_ptr;
	}
    }

    return NULL;
}


void copy_file_with_changes(char* compressed_file_name, char* output_file_name)
{
    FILE* fp1;
    FILE* fp2;
    uint8 buf1[FILE_BLOCK_SIZE];
    Disk_Header header1;
    size_t read1;
    size_t write2;
    uint32 ref;
    uint32 chunks_used;
    int block_count;
    int chunk_index;
    uint8* block;


    /* Open the old file and check it */

    fp1 = fopen(compressed_file_name, "r");
    if (fp1 == NULL)
    {
	perror(compressed_file_name);
	exit(1);
    }

    read1 = fread(buf1, 1, sizeof(buf1), fp1);

    if (read1 != FILE_BLOCK_SIZE)
    {
	if (ferror(fp1))
	{
	    perror(compressed_file_name);
	}
	else
	{
	    fprintf(stderr, "Unexpected end of file: %s\n", compressed_file_name);
	}
	fclose(fp1);
	exit(1);
    }

    memcpy(&header1, buf1, sizeof(header1));

    if (memcmp(header1.magic_number, "NWOS", 4) != 0)
    {
	fprintf(stderr, "Not an Objectify file: %s\n", compressed_file_name);
	fclose(fp1);
	exit(1);
    }


    /* modify the chunks used count in the header */

    nwos_4_uint8_to_uint32(header1.used_chunks, &chunks_used);

    printf("Changing used_chunks from %u to %u\n", chunks_used, chunks_used - blocks_found);

    chunks_used -= blocks_found;

    nwos_uint32_to_4_uint8(&chunks_used, header1.used_chunks);

    memcpy(buf1, &header1, sizeof(header1));


    /* Open the new file */

    fp2 = fopen(output_file_name, "w");
    if (fp2 == NULL)
    {
	perror(output_file_name);
	exit(1);
    }

    write2 = fwrite(buf1, 1, sizeof(buf1), fp2);


    read1 = fread(buf1, 1, sizeof(buf1), fp1);

    block_count = 1;
    ref = 0;

    while (!feof(fp1) && read1 == FILE_BLOCK_SIZE)
    {
	if (buf1[0] != 0 || buf1[1] != 0 || buf1[2] != 0 || buf1[3] != 0)
	{
	    printf("\n%s block %d - first four bytes not zero: %02x%02x%02x%02x\n",
		   compressed_file_name, block_count, buf1[0], buf1[1], buf1[2], buf1[3]);
	    break;
	}

	nwos_4_uint8_to_uint32(&buf1[4], &ref);

	/* first see if this is one of the blocks that moved */
	chunk_index = find_single_block(ref);

	if (chunk_index > -1 && blocks[chunk_index].new != 0)
	{
	    nwos_uint32_to_4_uint8(&blocks[chunk_index].new, &buf1[4]);

	    write2 = fwrite(buf1, 1, sizeof(buf1), fp2);

	    printf("Moved: %08x -> %02x%02x%02x%02x\n", ref, buf1[4], buf1[5], buf1[6], buf1[7]);
	}
	else
	{
	    block = find_block_in_ref_list(ref);

	    if (block != NULL)
	    {
		write2 = fwrite(block, 1, FILE_BLOCK_SIZE, fp2);    /* write the newly encrypted block instead */

		printf("Changed: %08x\n", ref);
	    }
	    else
	    {
		write2 = fwrite(buf1, 1, sizeof(buf1), fp2);
	    }
	}

	
	block_count++;

	read1 = fread(buf1, 1, sizeof(buf1), fp1);
    }

    if (!feof(fp1) && read1 != FILE_BLOCK_SIZE)
    {
	perror(compressed_file_name);
    }

    fclose(fp1);

    if (fclose(fp2) != 0)
    {
	perror(output_file_name);
    }
}


int main(int argc, char* argv[])
{
    uint8 big_key[16 + 8 + 4];
    uint8 bf_key[16];
    uint32 linear;
    uint32 serial;
    ObjRef root_object_ref;
    ObjRef assoc_class_ref;
    ObjRef object_class;
    C_struct_Class_Definition class_def_obj;
    C_struct_Path_And_File_Association assoc_obj;
    ReferenceList* ref_list;
    int num_refs;
    int i;
    int j;
    char path[128];

    if (argc != 3)
    {
	fprintf(stderr, "usage: %s old_file new_file\n", argv[0]);
	exit(1);
    }

    nwos_log_arguments(argc, argv);

    nwos_get_key_from_password(big_key, sizeof(big_key));

    memcpy(bf_key, big_key, 16);
    linear = ((uint32)big_key[16] << 24) | ((uint32)big_key[17] << 16) | ((uint32)big_key[18] << 8) | (uint32)big_key[19];
    memcpy(root_object_ref.id, big_key+20, 4);
    serial = ((uint32)big_key[24] << 24) | ((uint32)big_key[25] << 16) | ((uint32)big_key[26] << 8) | (uint32)big_key[27];

    /* read the compressed file and find the chunks with only one block */
    find_single_block_chunks(argv[1]);

    /* fire up objectify and see which of those single blocks we can find in the files */
    nwos_initialize_objectify(bf_key, linear, serial,   Compressed_File_RO, argv[1]);

    nwos_set_root_object(&root_object_ref);

    scan_for_matching_blocks();

    /* find new locations for the blocks we matched up */
    find_new_locations();
    put_new_locations_in_ref_list();

    /* and re-encrypt the modified reference list blocks */
    encrypt_reference_lists();

    /* shut down objectify for now */
    nwos_terminate_objectify();

    /* copy the file making the changes as we go */
    copy_file_with_changes(argv[1], argv[2]);

    printf("\nVerifying files:\n");
    fflush(stdout);

    /* export the modified files */
    nwos_initialize_objectify(bf_key, linear, serial,   Compressed_File_RO, argv[2]);

    nwos_set_root_object(&root_object_ref);

    assert(nwos_find_private_class_definition("PATH AND FILE ASSOCIATION", &assoc_class_ref));
    
    nwos_read_class_definition(&assoc_class_ref, &class_def_obj);

    ref_list = nwos_malloc_reference_list(&class_def_obj.header.object.references);

    num_refs = ref_list->common_header.num_refs;

    printf("num_refs: %d\n", num_refs);

    for (i = 0; i < num_refs; i++)
    {
	nwos_get_object_class(&ref_list->references[i], &object_class);

	if (is_same_object(&object_class, &assoc_class_ref))
	{
	    assert(nwos_read_object_from_disk(&ref_list->references[i], &assoc_obj, sizeof(assoc_obj)));

	    for (j = 0; j < files_found; j++)
	    {
		if (is_same_object(&assoc_obj.file, &files[j].file))
		{
		    nwos_file_path_to_string(&assoc_obj.path, path, sizeof(path));
		    printf("Restoring file: %08x - %s\n", nwos_ref_to_word(&files[j].file), path);
		    nwos_restore_file(&ref_list->references[i], path);
		    break;
		}
	    }
	}
    }

    nwos_terminate_objectify();


    printf("\n");

    printf("Total chunks: %d\n", total_chunks);
    printf("Single block chunks: %d\n", single_block_chunks);
    printf("Files checked: %d\n", files_checked);
    printf("Files found: %d\n", files_found);
    printf("Blocks found: %d\n", blocks_found);
    printf("Refs changed: %d\n", refs_changed);
    printf("First block found: %d\n", first_block_blocks);
    printf("Extra block found: %d\n", extra_block_blocks);

    return 0;
}
