#!/bin/sh
#
# analyses.sh
#
# Creates the directory structure for multiple BCC Pipeline analyses.
#
# usage:
#
#   analyses.sh [-j value] queueType analysesTable templateDirectory
#
#   queueType            type of the submission queue (Condor or qsub)
#   analysisTable        ASCII text table of analysis parameters
#   templateDirectory    directory of template analysis scripts
#
# The default queue type is Condor.
# An analysisTable named analyses.txt and a templateDirectory named
# template are assumed by default.
#
# The option -j NUMBER allows to break the job submission into pieces
# of NUMBER jobs at most.
#
# The analysis table should consist of whitespace seperated ASCII text
# with the following eleven columns.
#
#    1. analysisName
#    2. detectorNetwork
#    3. calibrationVersion
#    4. analysisMode
#    5. timeShifts
#    6. injectionName
#    7. injectionFactor
#    8. prefix
#    9. executable
#   10. segmentsFile
#   11. frameCacheFile
#
# The exectuable, segments file, and frame cache file should be
# specified either as absolute paths, or as paths relative to the
# directory containing this script.
#
# The template directory should contain the following files.
#
#   analysis.sub
#   parameters.txt
#
# A directory will be created for each analysis using the following
# naming convention.
#
#   <analysisName>/<detectorNetwork>/<injectionName>/<timeShifts>/<injectionFactor>
#
# This directory will then contain a copy of the specified template
# directory, after filling in the specified parameters from the
# analysis table, as well as a copy of the specified frame cache file
# and a dag file created from the specified segment list.

# Shourov K. Chatterji
# shourov@ligo.caltech.edu

# $Id: analyses.sh,v 1.3 2008/09/09 16:57:54 ecm Exp $

# Eric Chassande-Mottin, add qsub support


# parse the options first

limitJobsPerSubmission=
jobNumber=0
while getopts 'j:' OPTION
do
  case $OPTION in
      j) limitJobsPerSubmission=1
	 jobNumber="${OPTARG}"
	 ;;
      ?) printf "Usage: %s: [-j value] args\n" $(basename $0) >&2
	  exit 2
	  ;;
      esac
done
shift $((${OPTIND} - 1))

# parse command line arguments
if [ $# -ge 1 ]; then
  queueType=$1
else
  queueType=CONDOR
fi
queueType=`echo "$queueType" | tr [a-z] [A-Z]`

if [ $# -ge 2 ]; then
  analysesTable=$2
else
  analysesTable=analyses.txt
fi

if [ $# -ge 3 ]; then
  templateDirectory=$3
else
  templateDirectory=template
fi

# initialize line number counter
lineNumber=0

# begin loop over analyses table
while read line; do

  # increment line number
  lineNumber=`expr ${lineNumber} + 1`

  # remove comments
  line=`echo ${line} | sed -e 's|\#.*$||'`

  # skip empty lines
  if [ -z "${line}" ]; then
    continue;
  fi

  # parse line
  analysisName=`echo ${line} | awk ' { print $1 } '`
  detectorNetwork=`echo ${line} | awk ' { print $2 } '`
  channelNames=`echo ${line} | awk '{print $3 }'`
  frameTypes=`echo ${line} | awk ' { print $4 } '`
  analysisMode=`echo ${line} | awk ' { print $5 } '`
  timeShifts=`echo ${line} | awk ' { print $6 } '`
  injectionName=`echo ${line} | awk ' { print $7 } '`
  injectionFactor=`echo ${line} | awk ' { print $8 } '`
  prefix=`echo ${line} | awk ' { print $9 } '`
  executable=`echo ${line} | awk ' { print $10 } '`
  segmentsFile=`echo ${line} | awk ' { print $11 } '`
  frameCacheFile=`echo ${line} | awk ' { print $12 } '`

  # test for missing parameters
  if [ -z ${frameCacheFile} ]; then
    echo "ERROR: Syntax error on line ${lineNumber} of ${analysesTable}" 1>&2
    exit 1
  fi

  # if prefix is relative, convert to absolute
  command="s|^\([^/]\)|${PWD}/\1|"
  prefix=`echo ${prefix} | sed -e ${command}`

  # name of analysis directory
  analysisDirectory=${prefix}/${analysisName}/${detectorNetwork}/${injectionName}/${timeShifts}/${injectionFactor}

  # report status
  echo "creating ${analysisDirectory}..."

  if [ -d ${analysisDirectory} ]; then
    echo "ERROR: ${analysisDirectory} already exists" 1>&2
    exit 1
  fi

  # create analysis directory
  mkdir -p ${analysisDirectory}

  # if paths are relative, convert to absolute
  command="s|^\([^/]\)|${PWD}/\1|"
  templateDirectory=`echo ${templateDirectory} | sed -e ${command}`
  frameCacheFile=`echo ${frameCacheFile} | sed -e ${command}`

  # create symbolic link to frame cache file
  ln -s ${frameCacheFile} ${analysisDirectory}/framecache.txt

  # generate submission files
  case "${queueType}" in
      CONDOR)
	  
  # generate analysis dag file
	  cat ${segmentsFile} | \
	      sed -e 's|#.*$||' \
              -e 's|\([0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]\)|#\1|' \
              -e 's|^.*#||' -e 's|^\([0-9\.]*\)[^0-9\.]*\([0-9\.]*\).*$|\1 \2|' | \
	      awk ' NF == 2 { printf "%s %s\n", int($1), int($2) } ' | \
	      awk ' { printf "%s %s\n", $1, $2 < $1 ? $1 + $2 : $2 } ' | \
	      awk ' { printf "JOB %s-%s analysis.sub\nVARS %s-%s start=\"%s\" stop=\"%s\"\n\n", \
          int($1), int($2), int($1), int($2), int($1), int($2) } ' \
	      >${analysisDirectory}/analysis.dag
	  
  # apply analysis parameters to template submit script
	  sed -e "s|<analysisDirectory>|${analysisDirectory}|" \
	      -e "s|<executable>|${executable}|" \
	      ${templateDirectory}/analysis.sub \
	      >${analysisDirectory}/analysis.sub
	  ;;

      QSUB)
	  
          # create analysis directory
          mkdir -p ${analysisDirectory}/batch

	  # generate label and file name
	  analysisNumber=0
	  submissionLabel=`printf %02d ${analysisNumber}`
	  submissionFile=`printf "analysis%s.batch" ${submissionLabel}`

	  # initialize submission file
	  touch ${analysisDirectory}/${submissionFile}

          # set the executable mode
	  chmod ug+x ${analysisDirectory}/${submissionFile}

	  # initialize job counter
	  jobCount=0
	  
	  # define job tag
	  jobTag=${analysisName}-${detectorNetwork}-${timeShifts}-${injectionFactor}

	  # initialize status table
	  echo "# ${jobTag}" >  ${analysisDirectory}/status
	  echo ${submissionLabel} >> ${analysisDirectory}/status

          # parse segment list
	  for segment in `cat ${segmentsFile} | \
	      sed -e 's|#.*$||' \
              -e 's|\([0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]\)|#\1|' \
              -e 's|^.*#||' -e 's|^\([0-9\.]*\)[^0-9\.]*\([0-9\.]*\).*$|\1 \2|' | \
	      awk ' NF == 2 { printf "%s %s\n", int($1), int($2) } ' | \
	      awk ' { printf "%s,%s\n", int($1), $2 < $1 ? int($1 + $2) : int($2) } '` ; do

	      if [ ! -z ${limitJobsPerSubmission} ]; then
		  if [ "$jobCount" -eq "$jobNumber" ]; then
		      
		      # increment label and produce submission file name
		      let analysisNumber+=1      
		      submissionLabel=`printf %02d ${analysisNumber}`
		      submissionFile=`printf "analysis%s.batch" ${submissionLabel}`
		      
               	      # initialize submission file
		      touch ${analysisDirectory}/${submissionFile}
		      
                      # set the executable mode
		      chmod ug+x ${analysisDirectory}/${submissionFile}
		      
		      # reset job counter
		      jobCount=0
		      
	              # append to status table
		      echo ${submissionLabel} >> ${analysisDirectory}/status
		      
		  fi
	      fi
	    
	    # extract start and stop times
	    start=`echo "${segment}" | cut -d',' -f1`
	    stop=`echo "${segment}" | cut -d',' -f2`
	    
	    # construct batch file name and job tag
	    batchFile=${start}-${stop}.batch

	    # append command line to submission file
	    sed -e "s|<jobTag>|${jobTag}-${start}-${stop}-${submissionLabel}|" \
		-e "s|<batchFile>|batch/${batchFile}|" \
		${templateDirectory}/submission.batch \
		>> ${analysisDirectory}/${submissionFile}
	    
            # apply analysis parameters to template submit script
	    sed -e "s|<start>|${start}|" \
		-e "s|<stop>|${stop}|" \
		-e "s|<analysisDirectory>|${analysisDirectory}|" \
		-e "s|<executable>|${executable}|" \
		${templateDirectory}/command.batch \
		> ${analysisDirectory}/batch/${batchFile}
	    
	    # set the executable mode
	    chmod ug+x ${analysisDirectory}/batch/${batchFile}
	    	    
	    # increment job counter
	    if [ ! -z ${limitJobsPerSubmission} ]; then
		let jobCount+=1 
	    fi

	  done
	  ;;
      
      *) printf "Queue type %s is not supported" $queueType	  
  esac

  # construct frame types for parameters file
  channelNames=`echo ${channelNames} | sed "s/\([^,$]*\)/\'\1\'/g"` 
  channelNames=`echo ${channelNames} | sed -e "s|''|', '|g"`
  channelNames="{${channelNames}}"

  # construct frame types for parameters file
  frameTypes=`echo ${frameTypes} | sed "s/\([^,$]*\)/\'\1\'/g"` 
  frameTypes=`echo ${frameTypes} | sed -e "s|''|', '|g"`
  frameTypes="{${frameTypes}}"

  # construct time shifts for parameters file
  timeShifts=`echo ${timeShifts} | sed -e 's|\([0-9]\)\([+-]\)|\1 \2|g'`
  timeShifts="[${timeShifts}]"

  # construct injection names for parameters file
  injectionNames=`echo ${detectorNetwork} | sed -e "s|\([A-Z][0-9]\)|'\1:GW-H'|g"`
  injectionNames=`echo ${injectionNames} | sed -e "s|''|', '|g"`
  injectionNames="{${injectionNames}}"

  # construct injection types for parameters file
  injectionTypes=`echo ${detectorNetwork} | sed -e "s|\([A-Z][0-9]\)|'${injectionName}'|g"`
  injectionTypes=`echo ${injectionTypes} | sed -e "s|''|', '|g"`
  injectionTypes="{${injectionTypes}}"

  # construct injection factors for parameters file
  injectionFactors=`echo ${detectorNetwork} | sed -e "s|\([A-Z][0-9]\)|'${injectionFactor}'|g"`
  injectionFactors=`echo ${injectionFactors} | sed -e "s|''| |g" -e "s|'||g"`
  injectionFactors="[${injectionFactors}]"

  # construct analysis mode for parameters file
  analysisMode="'${analysisMode}'"

  # apply analysis parameters to template parameters file
  sed -e "s|<channelNames>|${channelNames}|" \
      -e "s|<frameTypes>|${frameTypes}|" \
      -e "s|<timeShifts>|${timeShifts}|" \
      -e "s|<injectionNames>|${injectionNames}|" \
      -e "s|<injectionTypes>|${injectionTypes}|" \
      -e "s|<injectionFactors>|${injectionFactors}|" \
      -e "s|<analysisMode>|${analysisMode}|" \
    ${templateDirectory}/parameters.txt \
    >${analysisDirectory}/parameters.txt

# end loop over analyses table
done <${analysesTable}

# return to calling function
exit 0
