"""
HTML Export v0.1
Author: Dinesh A. Joshi <dinesh.joshi@yahoo.com>

NOTE: Requires BeautifulSoup.py
"""

import os
import pickle
from Products.selfApp.Extensions.BeautifulSoup import *
from Products.selfApp.Extensions.courseStruct import *
import os, sys
import random

class HTMLExport:
    def __init__( self, ssid, zobj, instanceName ):
        self.ssid = ssid
        self.gnowql = zobj.gnowql
        self.instanceName = instanceName
        self.soup = ""
        self.dirName = str( int( random.random() * 1000000000000 ) ) + "htmlexp"
#         self.dirName = str( ssid ) + "htmlexp"

        print "instance: %s" % self.instanceName
        # delete the directory if it already exists
        os.system( 'rm -Rf /tmp/' + self.dirName )

    def makeCourseStructure( self, lstStruct ):
        # do nesting..
        
        pass

    def getCourseStructure( self ):
        ssid = self.ssid
        preuri = self.gnowql.geturifromssid( ssid, 'gbobjects' )
        finaluri = preuri + '/' + str( ssid ) + '_' + 'CourseStructure'

        print finaluri
        fp = open( finaluri, 'rb' ) 
        info = pickle.load( fp )
        fp.close()

        return info

    def stripHTML( self, html ):
        if html == None:
            return ""

        startIndex = html.find( "<body>" )
        endIndex = html.find( "</body>" )

        if endIndex <= startIndex:
            endIndex = len( html )
            
        body = html[ startIndex+6 : endIndex-7 ]
        return body

    def rewriteImageLinks( self, html ):
        soup = self.soup
        imageTags = soup.findAll( 'img' )

        for tag in imageTags:
            prevURL = tag['src']

            # if the URL is not on our server, then skip it
            # this is not a very good criteria to classify
            # if a URL exists on our server or remote but
            # currently it is so
            if prevURL[:7] == 'http://':
                print "skipping..."
                continue

            tks = prevURL.split( '/' )
            rstks = tks[ len( tks ) - 3 : ]
            rstks.insert( 0, './resources' )
            tag['src'] = "/" . join( rstks )
        
        return soup.__str__()
#         return soup.prettify()

    def copyImages( self ):
        # must be called after rewriteImageLinks()
        images = self.soup.findAll( 'img' )
        flagResources = 0

        imgDirs = []
        srcPaths = []
        basePath = CLIENT_HOME + '/GB/' + self.instanceName + '/'
        print "basepath: %s" % basePath
        
        for img in images:
            src = img['src']

            # if the URL is not on our server, then skip it
            # this is not a very good criteria to classify
            # if a URL exists on our server or remote but
            # currently it is so
            if src[:7] == 'http://':
                print "skipping..."
                continue

            tks = src.split( '/' )
            rstks = tks[ len( tks ) - 3 : ]
            if rstks[0] not in imgDirs:
                imgDirs.append( rstks[0] )
                print rstks[0]
                tmp = src.split( rstks[0] )
                completePath = basePath + tmp[0][7:] + rstks[0] + '/images/'
                print "completePath: %s" % completePath
                srcPaths.append( completePath )
                print tmp[0][2:]
                
        i=0
        for src in srcPaths:
            mkdirstr = 'mkdir -p /tmp/%s/resources/%s' % ( self.dirName, imgDirs[i] )
            os.system( mkdirstr )
            cpstr = 'cp -a %s /tmp/%s/resources/%s' % ( src, self.dirName, imgDirs[i] )
            print mkdirstr
            print cpstr
            flagResources = 1
            os.system( cpstr )
            i = i + 1

        return( flagResources )
    
    def getHTMLContent( self ):
        exportSSID = self.ssid
        courseObjInfo = self.gnowql.getinfoFromSSID( exportSSID, 'gbobjects' )
        courseStructure = self.getCourseStructure()

        print "course info: %s" % courseObjInfo
        print "course structure: %s" % courseStructure

        lstContents = []

        # find out the unique ssids from the course structure
        for pair in courseStructure:
            for p in pair:
                if p not in lstContents:
                    if p != '0':
                        lstContents.append( p )
#                     if p != '0' and int( p ) != self.ssid:
      
        print "getting info of: %s" % lstContents

        # get all the contents
        contentInfo = self.gnowql.getAllSSIDs( lstContents, 'gbobjects' )
        
        lstTitles = {}
        for c in lstContents:
            lstTitles[ '%s' % c ] = contentInfo[int( c )]['title'][0]

        print "lst: %s" % lstTitles

        o = courseStruct( courseStructure, lstTitles )
        treeHTML = o.getTreeHTML()
        lstContents = o.getLinearStruct()

        contentHTML = ""
        tmp = ""

        for value in lstContents:
            contentHTML = contentHTML + "<a name=\"%s\"></a>" % ( contentInfo[ int( value ) ]['ssid'] ) + self.stripHTML( contentInfo[ int( value ) ]['content'] )
            contentHTML = contentHTML + "<br><br><a href=\"#top\">Top</a>"

#         for key, value in contentInfo.items():
#             contentHTML = contentHTML + "<a name=\"%s\"></a>" % ( key ) + self.stripHTML( value['content'] )
#             contentHTML = contentHTML + "<a href=\"#top\">Top</a>"

        return( treeHTML + contentHTML )

    def export( self ,ssid ):
        html = self.getHTMLContent()
        soup = BeautifulSoup( html )

        self.soup = soup
        completeHTML = html
        os.system( 'mkdir /tmp/' + self.dirName )
        os.system( 'mkdir /tmp/' + self.dirName + '/resources' )
        
        flag = self.copyImages()
        
        completeHTML = self.rewriteImageLinks( completeHTML )

        info = self.gnowql.getinfoFromSSID( ssid ,'gbobjects')
        uid = info['uid']
        username = self.gnowql.getUsernamefromUserId( str(uid) )
        creationinfo = str(info['gbtimestamp'])
        splitcreationinfo = creationinfo.split(' ')
        creationdate =  splitcreationinfo[0]
        creationtime = splitcreationinfo[1].split('.')[0]
        noofchangesaftercommit = info['noofchangesaftercommit']
        noofcommits = info['noofcommits']
        version = 'V'+ str(noofchangesaftercommit) +'.' +str(noofcommits)
        
        metadatahtml  = '<ul>'
        metadatahtml = metadatahtml + '<b>' + 'Author' + '</b>' + ':' + username + '<br />'
        metadatahtml = metadatahtml + '<b>' + 'Creation Date' + '</b>' + ':' + str(creationdate)  + '<br />'
        metadatahtml = metadatahtml + '<b>' + 'Creation Time' + '</b>' + ':' + str(creationtime)  + '<br />'
        metadatahtml = metadatahtml + '<b>' + 'Version' + '</b>' + ':' + version + '<br />'
        metadatahtml  = metadatahtml + '</ul>' + '<br />'

        completeHTML = metadatahtml + completeHTML
        return completeHTML
        
        htmlPageSkel = "<html><body><a name=\"top\"></a><ol>%s</ol></body></html>"

        fp = open( "/tmp/" + self.dirName + "/index.html", "w" )
        fp.write( completeHTML )
        fp.close()

        # check if the resources directory contains
        # any resources, if so
        if flag == 0:
            os.system( 'rmdir /tmp/%s/resources' % self.dirName )

        finalCmd = 'cd /tmp/%s && zip -r /tmp/%s/course.zip ./*' % ( self.dirName, self.dirName )
        os.system( finalCmd )

    def cleanup( self ):
        """
        cleans up all the created files and directories
        """
        os.system( 'rm -Rf /tmp/' + self.dirName )


def test( self, ssid, insname ):
#     print CLIENT_HOME
#     print insname
    he = HTMLExport( ssid, self, insname )
    ef = he.export( ssid )
    print "course was saved in .. " + he.dirName

    # First copy the course1.zip and then run the cleanup function
    #he.cleanup()
    #return ef
    finalurl = self.portal_url() + '/export/' + str( he.dirName )  + '/course.zip'
    self.REQUEST.RESPONSE.redirect(finalurl)

def test2( self, ssid, insname ):
    
    preuri = self.gnowql.geturifromssid( ssid, 'gbobjects' )
    finaluri = preuri + '/' + str( ssid ) + '_' + 'CourseStructure'

    print finaluri
    fp = open( finaluri, 'rb' ) 
    info = pickle.load( fp )
    fp.close()
    
    exportSSID = ssid
    courseObjInfo = self.gnowql.getinfoFromSSID( exportSSID, 'gbobjects' )
    courseStructure = info

    print "course info: %s" % courseObjInfo
    print "course structure: %s" % courseStructure

    lstContents = []

    # find out the unique ssids from the course structure
    for pair in courseStructure:
        for p in pair:
            if p not in lstContents:
                if p != '0':
                    lstContents.append( p )
#                     if p != '0' and int( p ) != self.ssid:

    print "getting info of: %s" % lstContents

    # get all the contents
    contentInfo = self.gnowql.getAllSSIDs( lstContents, 'gbobjects' )

    lstRet = []
    dictContents = {}
    for ssid, dictVal in contentInfo.items():
        dictContents[ ssid ] = dictVal['content']

    o = courseStruct( courseStructure, [] )
    lstSSIDs = o.getLinearStruct()

    print "Dict: %s" % dictContents.keys()

    for ssid in lstSSIDs:
        content = dictContents[ ssid ]

        if( content == None ):
            continue

#        lstRet.append( "ssid is %s!!<br> %s" % ( ssid, content ) )
        lstRet.append( [ ssid, content ] )

    return lstRet

