#! /bin/bash

# Copyright (C) 2011 Charles Atkinson
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA

# Purpose: runs the document collation scripts

# Usage: 
#    * Single mandatory argument, being the config file
#    * Automatically outputs to log if there is no tty to log to
#    * The log is created in the configured log directory
#    * To force output to log: export SET_HAVE_TTY_FALSE=true

# Function call tree
#    +
#    |
#    +-- initialise
#    |   |
#    |   +-- parse_cfg
#    |
#    +-- run_scripts
#    |
#    +-- finalise
#
# Utility functions called from various places:
#    ck_file msg

# Function definitions in alphabetical order.  Execution begins after the last function definition.

#--------------------------
# Name: finalise
# Purpose: final logging and exit
#--------------------------
function finalise {

    local buf msg rc send_log

    # Final logging
    # ~~~~~~~~~~~~~
    msg=
    rc=$1
    case $rc in 
        129 )
            msg I "$my_nam: finalising on SIGHUP"
            ;;
        130 )
            msg I "$my_nam: finalising on SIGINT"
            ;;
        131 )
            msg I "$my_nam: finalising on SIGQUIT"
            ;;
        143 )
            msg I "$my_nam: finalising on SIGTERM"
    esac
    case "$global_warning_flag $global_error_flag" in
        "$false $false" )
            msg I "$my_nam: no warnings or errors detected in the logs (omindex skipped files are so normal, any such warning is ignored)"
            ;;
        "$true "* )
            msg="$msg"$'\n'"  There was at least one WARNING in the logs"
            ;;
        *" $true" )
            msg="$msg"$'\n'"  There was at least one ERROR in the logs"
            ;;
    esac
    if [[ ${cumulative_rc:-} != '' ]]; then
        msg I "$my_nam: cumulative return code from called scripts: $cumulative_rc"
        [[ $cumulative_rc -gt 0 && $rc -eq 0 ]] && rc=1
    fi
    msg I "$my_nam: exiting with return code $rc"

    # Mail log if required
    # ~~~~~~~~~~~~~~~~~~~~
    if [[ ! $have_tty && $send_log_to != '' ]]; then
        send_log=$false
        case $send_log_when in
            'always' )
                send_log=$true
                ;;
            'warning' )
                [[ $global_warning_flag ]] && send_log=$true
                ;;
            'error' )
                [[ $global_error_flag ]] && send_log=$true
                ;;
        esac
        if [[ $send_log ]]; then
            subject='docoll run_scripts log'
            case $send_log_with in
                'postfix' | 'sendmail' )
                    buf="From: $( id --name --user ) at $( hostname --short ) <$send_log_to>"
                    buf="$buf"$'\n'"To: $send_log_to"
                    buf="$buf"$'\n'"Subject: $subject"
                    buf="$buf"$'\n\n'"$( < $my_log_fn )"
                    echo "$buf" | sendmail -t -i -f "$send_log_to"
                    ;;
                'mail' )
                     mail -s 'docoll run_scripts log' $send_log_to < $my_log_fn
            esac
        fi
    fi

    exit $rc

}  # end of function finalise

#--------------------------
# Name: initialise
# Purpose: sets up environment, parses command line, sets up logging and parses the config file
#--------------------------
function initialise {

    local bash_lib buf my_cfg_fn now

    # Configure shell
    # ~~~~~~~~~~~~~~~
    # Normally this is done by the bash library but must be done early by this 
    # script because it has to read its config file to determine which bash
    # library to use
    export PATH=/usr/bin:/bin
    set -o nounset 
    shopt -s extglob
    umask 0022

    # Parse command line
    # ~~~~~~~~~~~~~~~~~~
    if [[ "${1:-}" = '' ]]; then
        echo "${0##*/}: mandatory config file argument missing or empty" >&2
        exit 1
    fi
    my_cfg_fn=$1
    
    # Set defaults that may be overriden by the configuration file
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    ionice_class=2      # Best Effort I/O class
    ionice_data=7       # Lowest I/O priority
    nice=19             # Lowest CPU priority
    log_retention=28    # Days to keep log files
    send_log_to=
    send_log_when=never
    send_log_with=postfix
    tmp_retention=7     # Days to keep tmp files

    # Parse configuration file
    # ~~~~~~~~~~~~~~~~~~~~~~~~
    emsg=''
    parse_cfg "$my_cfg_fn"

    # Change to program directory
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~
    if [[ ! -d $prog_dir || ! -r $prog_dir || ! -x $prog_dir ]]; then
        echo "Program directory '$prog_dir' does not exist, does not have necessary permissions or is unreachable" >&2
        exit 1
    fi
    cd "$prog_dir" || exit 1

    # Source the bash library
    # ~~~~~~~~~~~~~~~~~~~~~~~
    bash_lib=./bash_lib.sh
    source "$bash_lib"
    if [[ $? -ne 0 ]]; then
        echo "${0##*/}: unable to read the bash library, '$bash_lib'. Exiting" >&2
        exit 1
    fi

    # Error traps
    # ~~~~~~~~~~~
    # Delayed until now when bash library function ck_file is available
    case $ionice_class in
        1 | 2 | 3 )
            ;;
        * )
            emsg="$emsg"$'\n'"  keyword 'ionice class': invalid value (must be 1, 2 or 3)"
    esac
    if [[ $ionice_data =~ ^[0-9]+$ ]]; then
         [[ $ionice_data -lt 0 || $ionice_data -gt 7 ]] \
              && emsg="$emsg"$'\n'"  keyword 'ionice data': invalid value (must be 0 to 7): $ionice_data"
         ionice_data=$(( $ionice_data )) # Ensure no leading zeroes
    else
        emsg="$emsg"$'\n'"  keyword 'ionice data': invalid value (must be 0 to 7): $ionice_data"
    fi
    if [[ ${cfg_dir:-} != '' ]]; then
        buf=$( ck_file $cfg_dir d:rx: 2>&1 )
        [[ $buf != '' ]] && emsg="$emsg"$'\n'"  configuration directory: $buf"
    else
        emsg="$emsg"$'\n'"  keyword 'configuration directory' not found or has no data"
    fi
    if [[ $nice =~ ^-*[0-9]+$ ]]; then
         [[ $nice -lt -20 || $nice -gt 19 ]] \
              && emsg="$emsg"$'\n'"  keyword 'nice': invalid value (must be -20 to 19): $nice"
         nice=$(( $nice )) # Ensure no leading zeroes
    else
        emsg="$emsg"$'\n'"  keyword 'nice': invalid value (must be -20 to 19): $nice"
    fi
    if [[ ${log_dir:-} != '' ]]; then
        buf=$( ck_file $log_dir d:rwx: 2>&1 )
        [[ $buf != '' ]] && emsg="$emsg"$'\n'"  log directory: $buf"
    else
        emsg="$emsg"$'\n'"  keyword 'log directory' not found or has no data"
    fi
    if [[ ${log_retention:-} != '' ]]; then
        if [[ ! $log_retention =~ ^[0-9]+$ ]]; then
            emsg="$emsg"$'\n'"  'log retention' is not an unsigned integer"
        fi
    fi
    if [[ ${prog_dir:-} != '' ]]; then
        buf=$( ck_file $prog_dir d:rx: 2>&1 )
        [[ $buf != '' ]] && emsg="$emsg"$'\n'"  program directory: $buf"
    else
        emsg="$emsg"$'\n'"  keyword 'program directory' not found or has no data"
    fi
    case $send_log_when in
        'always' | 'warning' | 'error' )
            valid=$false
            case $send_log_to in
                *@* )
                    [[ $send_log_to =~ ^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}$ ]] && valid=$true
                    ;;
                * )
                    grep -q "^$send_log_to:" /etc/passwd && valid=$true
            esac
            [[ ! $valid ]] && emsg="$emsg"$'\n'"  invalid 'send log to' email address '$send_log_to'"
            ;;
        'never' )
            ;;
        * )
            emsg="$emsg"$'\n'"  invalid 'send log when' value '$send_log_when'.  Must be always, warning, error or never"
    esac
    case $send_log_with in
        'mail' | 'postfix' | 'sendmail' )
            ;;
        * )
            emsg="$emsg"$'\n'"  invalid 'send log with' value '$send_log_with'.  Must be mail, postfix or sendmail"
    esac
    if [[ ${tmp_dir:-} != '' ]]; then
        buf=$( ck_file $tmp_dir d:rx: 2>&1 )
        [[ $buf != '' ]] && emsg="$emsg"$'\n'"  temporary directory: $buf"
    else
        emsg="$emsg"$'\n'"  keyword 'temporary directory' not found or has no data"
    fi
    if [[ ${tmp_retention:-} != '' ]]; then
        if [[ ! $tmp_retention =~ ^[0-9]+$ ]]; then
            emsg="$emsg"$'\n'"  'tmp file retention' is not an unsigned integer"
        fi
    fi

    if [[ $emsg != '' ]]
    then
        echo "${0#*/}: Problems with configuration file $my_cfg_fn:$emsg" >&2
        exit 1
    fi

    # Override tty status
    # ~~~~~~~~~~~~~~~~~~~
    [[ ${SET_HAVE_TTY_FALSE:-$false} ]] && have_tty=$false
    
    # Set up output redirection and logging
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    ck_file "$log_dir" d:w: || finalise 1
    now=$( date +'%y-%m-%d@%H:%M' )
    if [[ ! $have_tty ]]; then
        my_log_fn=$log_dir$my_nam.$now.log
        exec 1>>"$my_log_fn"
        exec 2>>"$my_log_fn"
    else
        exec 1>/dev/tty
        exec 2>/dev/tty
    fi
    msg I "$my_nam: started by: $0 $*"
    
    # Log configuration and locale values
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    msg I "$my_nam: configuration:
  Backup directory: $backup_dir $( my_readlink $backup_dir )
  Configuration directory: $cfg_dir $( my_readlink $cfg_dir )
  I/O nice class: $ionice_class
  I/O nice data: $ionice_data
  Log directory: $log_dir $( my_readlink $log_dir )
  Log retention: $log_retention
  Nice: $nice
  Program directory: $prog_dir $( my_readlink $prog_dir )
  Send log when: $send_log_when
  Send log_to: $send_log_to
  Tmp retention: $tmp_retention"
    msg I "$my_nam: locale:"$'\n'"$( locale | sed 's/^/  /' )"

}  # end of function initialise

#--------------------------
# Name: parse_cfg
# Purpose: parses the configuration file
# $1 - pathname of file to parse
#--------------------------
function parse_cfg {

    local cfg_fn data keyword

    # Does the file exist?
    # ~~~~~~~~~~~~~~~~~~~~
    cfg_fn=$1
    if [[ ! -f $cfg_fn || ! -r $cfg_fn ]]; then
        echo "Configuration file '$cfg_fn' does not exist, cannot be read or is unreachable" >&2
        exit 1
    fi

    exec 3< $cfg_fn                                 # set up the config file for reading on file descriptor 3
    while read -u 3 buf                             # for each line of the config file
    do
        buf="${buf%%#*}"                            # strip any comment
        buf="${buf%%*(  )}"                         # strip any trailing spaces and tabs
        buf="${buf##*(  )}"                         # strip any leading spaces and tabs
        if [[ $buf = '' ]]; then
            continue                                # empty line
        fi
        keyword="${buf%%=*}"
        keyword="${keyword%%*([[:space:]])}"        # strip any trailing spaces and tabs
        data="${buf#*=}"
        data="${data##*([[:space:]])}"              # strip any leading spaces and tabs
        case "$keyword" in
            'backup directory' )
                backup_dir=${data%%*(/)}/           # ensure a single trailing slash
                ;;
            'configuration directory' )
                cfg_dir=${data%%*(/)}/              # ensure a single trailing slash
                ;;
            'ionice class' )
                ionice_class=$data
                ;;
            'ionice data' )
                ionice_data=$data
                ;;
            'log directory' )
                log_dir=${data%%*(/)}/              # ensure a single trailing slash
                ;;
            'log retention' )
                log_retention=$data
                ;;
            'nice' )
                nice=$data
                ;;
            'program directory' )
                prog_dir=${data%%*(/)}/             # silently ensure a single trailing slash
                ;;
            'send log to' )
                send_log_to=$data
                ;;
            'send log when' )
                send_log_when=$data
                ;;
            'send log with' )
                send_log_with=$data
                ;;
            'tmp directory' )
                tmp_dir=${data%%*(/)}/              # ensure a single trailing slash
                ;;
            'tmp retention' )
                tmp_retention=$data
                ;;
            * )
                emsg="$emsg"$'\n'"  Unrecognised keyword: '$keyword'"
                ;;
        esac

    done
    exec 3<&- # free file descriptor 3

}  # end of function parse_cfg

#--------------------------
# Name: run_scripts
# Purpose: runs the document collation scripts
#--------------------------
function run_scripts {

    local backup_db_sh_rc collate_rb_rc log_fn omindex_sh_rc

    # Collate
    # ~~~~~~~
    cumulative_rc=0
    # The log is typically large when collate.rb is run with loglevel INFO or DEBUG
    # so this script tries to excerpt only the useful parts
    msg I "$my_nam: Starting collation"
    now=$( date +'%y-%m-%d@%H:%M' )
    log_fn=${log_dir}collate.rb.$now.log
    time nice -n $nice ionice -c$ionice_class -n$ionice_data -t \
        ./collate.rb --config ${cfg_dir}collate.cfg --log $log_fn
    collate_rb_rc=$?
    (( cumulative_rc = cumulative_rc + collate_rb_rc ))
    if [[ $collate_rb_rc -eq 0 ]]; then
        ck_file $log_fn f:r: || finalise 1
        if [[ $( wc -l < "$log_fn" ) -gt 50 ]]; then
            head -20 $log_fn
            echo -e '...\n[snip]\n...'
            tail -15 $log_fn
        else
            cat $log_fn
        fi
    else
        ck_file $log_fn f:r: || finalise 1
        msg I "$my_nam: $log_fn exerpts:"
        head -20 $log_fn
        echo -e '...\n[ERRORS and WARNINGS begin]\n...'
        grep -e ' WARNING:' -e ' ERROR:' -e ' FATAL:' --context=3 $log_fn
        grep -q ' WARN:' $log_fn && global_warning_flag=$true
        grep -q -e ' ERROR:' -e ' FATAL:' $log_fn && global_error_flag=$true
        msg W "$my_nam: return code $collate_rb_rc from collate.rb"
    fi
    
    # Backup the database
    # ~~~~~~~~~~~~~~~~~~~
    msg I "$my_nam: Running the backup script"
    now=$( date +'%y-%m-%d@%H:%M' )
    log_fn=${log_dir}backup_db.sh.$now.log
    nice -n $nice ionice -c$ionice_class -n$ionice_data -t \
        ./backup_db.sh $cfg_dir $backup_dir $log_fn
    backup_db_sh_rc=$?
    (( cumulative_rc = cumulative_rc + backup_db_sh_rc ))
    [[ ! $have_tty ]] && cat $log_fn
    if [[ $backup_db_sh_rc -ne 0 ]]; then
        msg W "$my_nam: non-zero return code $backup_db_sh_rc from backup_db.sh"
    fi
    
    # Index
    # ~~~~~
    msg I "$my_nam: Starting indexing"
    now=$( date +'%y-%m-%d@%H:%M' )
    log_fn=${log_dir}omindex.sh.$now.log
    time nice -n $nice ionice -c$ionice_class -n$ionice_data -t \
        ./omindex.sh -c "$cfg_dir" -l "$log_fn" -t "$tmp_dir"
    omindex_sh_rc=$?
    (( cumulative_rc = cumulative_rc + omindex_sh_rc ))
    [[ ! $have_tty ]] && cat $log_fn 
    if [[ $omindex_sh_rc -ne 0 ]]; then
        msg W "$my_nam: non-zero return code $omindex_sh_rc from omindex.sh"
    fi

    # Clean
    # ~~~~~
    # The log is typically large when clean.rb is run with loglevel INFO or DEBUG
    # so this script tries to excerpt only the useful parts
    msg I "$my_nam: Starting cleaning"
    now=$( date +'%y-%m-%d@%H:%M' )
    log_fn=${log_dir}clean.rb.$now.log
    time nice -n $nice ionice -c$ionice_class -n$ionice_data -t \
        ./clean.rb --config ${cfg_dir}collate.cfg --log $log_fn
    clean_rb_rc=$?
    (( cumulative_rc = cumulative_rc + clean_rb_rc ))
    if [[ $clean_rb_rc -eq 0 ]]; then
        ck_file $log_fn f:r: || finalise 1
        if [[ $( wc -l < "$log_fn" ) -gt 50 ]]; then
            head -20 $log_fn
            echo -e '...\n[snip]\n...'
            tail -15 $log_fn
        else
            cat $log_fn
        fi
    else
        ck_file $log_fn f:r: || finalise 1
        msg I "$my_nam: $log_fn exerpts:"
        head -20 $log_fn
        echo -e '...\n[ERRORS and WARNINGS begin]\n...'
        grep -e ' WARN:' -e ' ERROR:' -e ' FATAL:' --context=3 $log_fn
        grep -q ' WARN:' $log_fn && global_warning_flag=$true
        grep -q -e ' ERROR:' -e ' FATAL:' $log_fn && global_error_flag=$true
        msg W "$my_nam: non-zero return code $clean_rb_rc from clean.rb"
    fi
    
}  # end of function run_scripts

#--------------------------
# Name: main
# Purpose: where it all happens
#--------------------------
initialise "${@:-}"
run_scripts
find "$log_dir" -type f -name '*.log' -mtime +$log_retention -delete
find "$tmp_dir" -type f -mtime +$tmp_retention -delete
finalise 0

