#! /bin/bash

# Copyright (C) 2011 Charles Atkinson
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA

# Purpose: indexes the document collation using Xapian Omega's omindex

# Usage: 
#    * The current working directory must be this script's directory
#    * Arguments
#      1: configuration directory.  Required
#      2: log directory.  Required
#      3: log file.  Required when logging (no tty or $SET_HAVE_TTY_FALSE is true)
#    * Automatically outputs to log if there is no tty to log to
#    * To force output to log: export SET_HAVE_TTY_FALSE=true

# Function call tree
#    +
#    |
#    +-- initialise
#    |   |
#    |   +-- parse_omindex_sh_cfg
#    |   |
#    |   +-- randomise_filter_choice
#    |
#    +-- index
#    |
#    +-- finalise
#
# Utility functions called from various places:
#    ck_file msg

# Function definitions in alphabetical order.  Execution begins after the last function definition.

#--------------------------
# Name: finalise
# Purpose: final logging and get out of here
#--------------------------
function finalise {

    local msg rc

    # When "not logging", delete temporary logs
    if [[ $have_tty ]]; then
        [[ ${parse_cfg_for_bash_rb_log_created:-$false} ]] && rm -f $parse_cfg_for_bash_rb_log_fn
        [[ ${omindex_log_created:-$false} ]] && rm -f $omindex_log_fn
    fi

    # Final logging
    # ~~~~~~~~~~~~~
    msg=
    rc=$1
    case $rc in 
        129 )
            msg I "my_nam: finalising on SIGHUP"
            ;;
        130 )
            msg I "my_nam: finalising on SIGINT"
            ;;
        131 )
            msg I "my_nam: finalising on SIGQUIT"
            ;;
        143 )
            msg I "my_nam: finalising on SIGTERM"
            ;;
    esac
    if [[ $global_warning_flag ]]; then
        msg="$msg"$'\n'"  There was at least one WARNING"
    fi
    if [[ $global_error_flag ]]; then
        msg="$msg"$'\n'"  There was at least one ERROR"
    fi
    if [[ "$msg" != '' ]]; then
        [[ $rc -lt 1 ]] && rc=1
        msg I "Error and warning summary:$msg"
    fi
    msg I "$my_nam: exiting with return code $rc"

    exit $rc

}  # end of function finalise

#--------------------------
# Name: initialise
# Purpose: sets up environment, parses command line, sets up logging and parses the config file
#--------------------------
function initialise {

    local bash_lib buf cfg_fn extra_log_text filter i log_fn my_cfg_fn my_log_dir now

    # Source the bash library
    # ~~~~~~~~~~~~~~~~~~~~~~~
    bash_lib=./bash_lib.sh
    source $bash_lib
    if [[ $? -ne 0 ]]; then
        echo "Unable to read the bash library, '$bash_lib'. Exiting" >&2
        exit 1
    fi
    
    # Override tty status
    # ~~~~~~~~~~~~~~~~~~~
    [[ ${SET_HAVE_TTY_FALSE:-$false} ]] && have_tty=$false
    
    # Parse command line
    # ~~~~~~~~~~~~~~~~~~
    # Has to be done now to determine log directory
    # When run from run_scripts.sh, any output goes into its log
    if [[ ${1:-} = '' ]]; then
        msg E "$my_nam: mandatory configuration directory argument missing or empty"
        finalise 1
    fi
    cfg_dir=${1%/}/
    if [[ ${2:-} = '' ]]; then
        msg E "$my_nam: mandatory output directory argument missing or empty"
        finalise 1
    fi
    log_dir=${2%/}/
    if [[ ! $have_tty ]]; then
        if [[ ${3:-} = '' ]]; then
            msg E "$my_nam: mandatory log file name argument missing or empty"
            finalise 1
        fi
        log_fn=$3
    fi
    
    # Check directory permissions
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~
    ck_file "$log_dir" d:rwx: || finalise 1
    if [[ ! $have_tty ]]; then
        my_log_dir=${log_fn%/*}/
        [[ $my_log_dir = / ]] && log_dir=./
        ck_file $my_log_dir d:rwx: || finalise 1
    else
        log_dir=/tmp/    # Required for omindex and parse_cfg_for_bash.rb
    fi

    # Set up output redirection and logging
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    if [[ ! $have_tty ]]; then
        exec 1>>"$log_fn"
        exec 2>>"$log_fn"
    else
        exec 1>/dev/tty
        exec 2>/dev/tty
    fi
    msg I "$my_nam: started by: $0 $*"
    
    # Parse the common configuration file
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    emsg=
    cfg_fn=${cfg_dir}collate.cfg
    ck_file $cfg_fn f:r: || finalise 1
    now=$( date +'%y-%m-%d@%H:%M' )
    parse_cfg_for_bash_rb_log_fn=${log_dir}parse_cfg_for_bash.rb.$now.log
    buf=$( ./parse_cfg_for_bash.rb --config $cfg_fn --log $parse_cfg_for_bash_rb_log_fn 2>&1 )
    parse_cfg_for_bash_rb_log_created=$true
    if [[ ! $buf =~ ^Parameters ]]; then
        cat $parse_cfg_for_bash_rb_log_fn
        finalise 1
    fi
    buf=$( echo "$buf" | grep 'CollationRootDir: ' \
        | sed --regexp-extended -e 's/  CollationRootDir: //' -e 's/[[:space:]]*$//' \
    )
    collation_root_dir=$buf
    [[ $collation_root_dir = '' ]] && \
        emsg="$emsg"$'\n'"  $cfg_fn: CollationRootDir keyword not found or has no value"

    # Parse this script's configuration file
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    emsg=
    my_cfg_fn=${cfg_dir}omindex.sh.cfg
    ck_file $my_cfg_fn f:r: || finalise 1
    parse_omindex_sh_cfg $my_cfg_fn || finalise 1
    if [[ ${index_db_dir:-} = '' ]]; then
        emsg="$emsg"$'\n'"  $my_cfg_fn: keyword 'omega index database directory' missing or has no value"
    fi

    # Incorporate any filters from the configuration file
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    using_unoconv_wrapper=$false
    i=0
    if [[ $filters = '' ]]; then
        filter_option_argument=
    else
        while read -r 
        do
            # An associative array is not used because they were relatively
            # new at the time of writing so many systems would not have a
            # version of bash that supports them
            filter_option_argument[i]=$REPLY
            let i++
        done <<< "$( echo "${filters}" )"
        randomise_filter_choice
    fi

    # Check directory permissions
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~
    buf=$( ck_file $collation_root_dir d:rx: 2>&1 )
    [[ $buf != '' ]] && emsg="$emsg"$'\n'"  $cfg_fn: keyword CollationRootDir "$'\n'"    $buf"
    buf=$( ck_file $index_db_dir d:wx: 2>&1 )
    if [[ $buf != '' ]]; then
        buf2=$( ck_file "${index_db_dir%/*/}/" d:wx: 2>&1 )
        if [[ $buf2 != '' ]]; then
            emsg="$emsg"$'\n'"  $my_cfg_fn: keyword 'omega index database directory': "$'\n'"    $buf"$'\n'"    $buf2"
        fi
    fi

    # Report any configuration errors
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    if [[ $emsg != '' ]]
    then
        msg E "$my_nam: configuration file(s) errors:$emsg"
        finalise 1
    fi

    # Set environment variables for called programs
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    [[ $java_opts != '' ]] && export JAVA_OPTS=$java_opts
    [[ $using_unoconv_wrapper ]] \
        && export UNOCONV_WRAPPER_LOG=${log_dir}unoconv_wrapper.$now.log

    # Log configuration values
    # ~~~~~~~~~~~~~~~~~~~~~~~~
    extra_log_text=
    [[ $java_opts != '' ]] && extra=$'\n'"  JAVA_OPTS: $JAVA_OPTS"
    if [[ ${filter_option_argument:-} != '' ]]; then
        for (( i=0; i<${#filter_option_argument[*]}; i++))
        do 
            extra_log_text+=$'\n'"  Filter: ${filter_option_argument[i]}"
        done 
    fi
    [[ $using_unoconv_wrapper ]] \
        && extra_log_text+=$'\n'"  \$UNOCONV_WRAPPER_LOG for unoconv_wrapper.sh: $UNOCONV_WRAPPER_LOG"
    msg I "$my_nam: configuration values:
  Configuration directory: $cfg_dir $( my_readlink $cfg_dir )
  Log directory: $log_dir $( my_readlink $log_dir )
  Collation root directory: $collation_root_dir $( my_readlink $collation_root_dir )
  Index database directory: $index_db_dir $( my_readlink $index_db_dir )$extra_log_text"

}  # end of function initialise

#--------------------------
# Name: index
# Purpose: indexes the document collation
#--------------------------
function index {
    local analyse_omindex_log_rc buf filter_option i j

    msg I "$my_nam: starting indexing (index database directory: $index_db_dir)"
    ck_file $index_db_dir d:rw: || finalise 1

    # Build omindex filter options array
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    if [[ $filter_option_argument != '' ]]; then
        for (( i=0; i<${#filter_option_argument[*]}; i++ ))
        do
            (( j=i*2 ))
            filter_option[j]='--filter'
            filter_option[j+1]=${filter_option_argument[i]}
        done
    fi

    # Run omindex, redirecting all output to a dedicated log
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    now=$( date +'%y-%m-%d@%H:%M' )
    omindex_log_fn=${log_dir}omindex.$now.log
    msg I "$my_nam: omindex log: $omindex_log_fn"
    exec 3>&1; exec 4>&2                  # Duplicate (save) existing file descriptors
    omindex_log_created=$true
    if [[ ${filter_option:-} = '' ]]; then
        buf=$( echo time omindex --db "$index_db_dir" \
            --stemmer=english \
            --url / \
            "$collation_root_dir" 1\>$omindex_log_fn 2\>\&1 )
        msg I "$my_nam: omindex command: $buf"
        time omindex --db "$index_db_dir" \
            --stemmer=english \
            --url / \
            "$collation_root_dir" 1>$omindex_log_fn 2>&1
    else
        buf=$( echo omindex --db "$index_db_dir" \
            "${filter_option[@]}" \
            --stemmer=english \
            --url / \
            "$collation_root_dir" 1\>$omindex_log_fn 2\>\&1 )
        msg I "$my_nam: omindex command: $buf"
        time omindex --db "$index_db_dir" \
            "${filter_option[@]}" \
            --stemmer=english \
            --url / \
            "$collation_root_dir" 1>$omindex_log_fn 2>&1
    fi
    exec 1>&3; exec 2>&4                  # Restore file descriptors
    exec 3>&-; exec 4>&-                  # Free unused file descriptors

    # Analyse the omindex log
    # ~~~~~~~~~~~~~~~~~~~~~~~
    buf=$( 
        if [[ ! $have_tty ]]; then
			./analyse_omindex_log.sh -c "$cfg_dir" -i "$omindex_log_fn" -l "$log_dir" 2>&1
        else
			./analyse_omindex_log.sh -c "$cfg_dir" -i "$omindex_log_fn" 2>&1
        fi
    )
    if [[ $? -eq 0 ]]; then
        msg I "$buf"
    else
        msg E "$buf"
        finalise 1
    fi

}  # end of function index

#--------------------------
# Name: randomise_filter_choice
# Purpose: when multiple filters were given in the configuration file
#    for the same "MIME type", chooses one at random.
#--------------------------
function randomise_filter_choice {

    local command commands i idx indexes j mime_type mime_types mime_types_seen unique_mime_types

    # Get list of unique MIME types
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    mime_types_seen=':'
    for (( i=0; i<${#filter_option_argument[*]}; i++ ))
    do
        mime_type=${filter_option_argument[i]%%:*}
        mime_types[i]=$mime_type
        [[ ! $mime_types_seen =~ :$mime_type: ]] && mime_types_seen+="$mime_type:"
        commands[i]=${filter_option_argument[i]#*:}
        command=${commands[i]%% *}
        buf=$( ck_cmd $command 2>&1 )
        [[ $buf != '' ]] && msg E "${filter_option_argument[i]}: $buf"
    done
    mime_types_seen=${mime_types_seen%:}
    mime_types_seen=${mime_types_seen#:}
    
    # Rebuild filter_option_argument with single member for each unique MIME type
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    unset filter_option_argument
    IFS=':'; unique_mime_types=( $mime_types_seen ); unset IFS
    for (( i=0; i<${#unique_mime_types[*]}; i++ ))
    do
        unique_mime_type=${unique_mime_types[i]}
        idx=0
        indexes=
        for (( j=0; j<${#mime_types[*]}; j++ ))
        do
            if [[ ${mime_types[j]} = $unique_mime_type ]]; then
                indexes[idx++]=$j
            fi
        done
        j=$RANDOM; (( j %= idx ))
        idx=$j
        j=${indexes[idx]}
        filter_option_argument[i]="$unique_mime_type:${commands[j]}"
        [[ ${filter_option_argument[i]} =~ unoconv_wrapper\.sh ]] && using_unoconv_wrapper=$true
    done

}  # end of function randomise_filter_choice

#--------------------------
# Name: main
# Purpose: where it all happens
#--------------------------
initialise "${@:-}"
index
finalise 0

