#!/usr/bin/env python
#
# Copyright (C) 2005 Nicolas Burrus
# This file is part of Tidydoc.
#
# Tidydoc is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Tidydoc is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Foobar; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

from getopt import getopt
import os, re, sys
from os.path import basename
from glob import glob
from xml.sax import saxutils

import td_dsc
import td_config
from td_config import config

def print_help():
  sys.stderr.write(
"""Usage: td-dsc-to-xml [options]

  Options

  -c, --config configfile  Config file to use.
  -v, --version            Print version number.

Nicolas Burrus <nes@lrde.epita.fr>
""")

def parse_command_line(argv):
  try: (options, args) = getopt(argv[1:], "vc:", ["version", "config"])
  except:
    print_help()
    sys.exit(1)
  
  if len(args) != 0:
    print_help()
    sys.exit(1)

  cmd_options = { 'config_file' : None }

  for option in options:
    if option[0] == "-v" or option[0] == "--version":
      print "Tidydoc " + ac_config.version
      print "Nicolas Burrus <nes@lrde.epita.fr>"
      sys.exit(0)
    if option[0] == "-c" or option[0] == "--config":
      cmd_options['config_file'] = option[1]

  return cmd_options

def run():
  recursive = True

  if not recursive:
    directories = [config['input_path']]
  else:
    directories = [config['input_path']]
    for root, dirs, files in os.walk(config['input_path']):
      for name in dirs:
        if not os.path.exists(os.path.join(root, name, ".td_raw_dir")):
          directories.append(os.path.join(root, name))
        else:
          dirs.remove(name)

  directories.sort()

  for directory in directories:
    # Insert normal documents
    docs = glob(directory + '/*.dsc')
    non_empty = False

    for doc in docs:
      non_empty = True
      sections = td_dsc.parse(doc)

      authors = sections['Author']

      for sec in sections.keys():
        sections[sec] = [saxutils.escape(x) for x in sections[sec]]
        sections[sec] = "&lt;br&gt;\n".join(sections[sec])

      # Look for the document
      files = glob(re.sub('\.dsc$', '*', doc))
      for f in files:
        if not re.match('.*\.dsc$', f) and not re.match('.*\.xml$', f):
          sections['Docfile'] = basename(f)
          break

      xml = open(directory + '/' + sections['Docfile'] + '.xml', "w")
      xml.write("""<?xml version="1.0" encoding="ISO-8859-1"?>""")
      xml.write(
    """
      <document>
        <title>%(Title)s</title>
        <file>%(Docfile)s</file>
        <nbpages>%(Pages)s</nbpages>
        <type>%(Visibility)s</type>""" % sections)

      for author in authors:
        xml.write("""
        <author>%s</author>""" % author)

      xml.write(
    """
        <date>%(Date)s</date>
        <language>%(Language)s</language>
        <summary>%(Abstract)s</summary>
        <comment><content>%(Comment)s</content></comment>
        <url>%(Link)s</url>
      </document>
    """ % sections)
      xml.close()

def __main__():
  cmd_options = parse_command_line(sys.argv)
  try:
    td_config.read_config(cmd_options['config_file'])
  except:
    sys.stderr.write(str(sys.exc_info()[0]) + "\n")
    sys.exit(1)
  run()

if __name__ == "__main__":
  __main__()
