#!/usr/bin/env python

""" Provides a Wiki page model.
    A PageConfiguration class is defined here.
    Functions are provided for getting a page, putting a page, checking the
    existence of a page, getting the tip revision number of a page, getting
    the revision date for a page and searching for a string in all the
    pages in an archive.
    RCS is called explicitly through the Python module os,
    and the ouput is used explicitly with the expectation of specific output
    formats.
    Different versions of RCS may require changes in this module.
    This code was developed using RCS version 5.7
"""
copyright = """
Copyright (C) 2003 Jaime Villate <villate@gnu.org>
 """
license = """
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
"""

import os, re, time
from wikiup import removeFile, quotate, devNull, Config
try:
    from cStringIO import StringIO
except:    
    from StringIO import StringIO
RCS_PATH, RLOG_PATH, CO_PATH, CI_PATH = Config('cvs','rcs','rlog','co','ci')
HTML_DIR = Config('static','html_dir')

# Exception strings
#

LockError = "Error locking archive."
PutError  = "Error saving page."
GetError  = "Error reading page."

def GetPage(Repository, PageName, Revision):
    """ Returns a tuple (PageText, PageRevision, TipRevisionDateTime)

        PageText is the text of PageName from the Repository.
        The page file will have a ,v suffix, but this should not be
        included in the argument PageName.

        If the revision indicated in Revision exists, it is returned,
        and TipRevisionDateTime is set to a string representing the
        latest modification date of the file.  Otherwise, a new, empty
        page is returned, and TipRevisionDateTime is set to ''.

        The RCS file is locked when the page is checked out, the revision
        number is determined while still locked and the file is then
        unlocked.

        If PageName or Revision have invalid characters, or if the RCS
        file cannot be checkeout, raises GetError.

    """

    if _hasSuspiciousCharacters(PageName):
        raise GetError, "suspicious archive name %s." % PageName
    if _hasSuspiciousCharacters(Revision):
        raise GetError, "suspicious revision %s." % Revision

    if PageExists(Repository, PageName):
        archiveName = _archiveName(Repository, PageName)
        if Revision == '':
            f = os.popen(CO_PATH + ' -x,v -p -l %s 2>%s' %
                                   (archiveName, devNull()))
            Revision = GetRevision(Repository, PageName) # get rev while locked
            pageText = f.read()
            error = f.close()
            if error <> None:
                raise GetError, "couldn't check out %s." % (PageName)
        else:
            f = os.popen(CO_PATH + ' -x,v -p -l -r%s %s 2>%s' %
                                   (Revision, archiveName, devNull()))
            pageText = f.read()
            error = f.close()
            if error <> None:
                return ('','','')

        # get date and time of tip revision
        (date, time, contributor) = \
                _getRevisionDateTimeContributor(Repository, PageName)
        #unlock the archive
        os.system(RCS_PATH + ' -M -u %s 2>%s' % (archiveName, devNull()))
        tipDateTime = '%s %s' % (date, time)
        return (pageText, Revision, tipDateTime)
    else:
        # Archive doesn't exist; return an empty page.
        return ('','','')
    
def PutPage(Repository, PageName, PageText, CheckoutTipDateTime,
            Contributor, logMsg, HTMLText):
    """ Checks PageText into file PageName,v in the Repository
    
        The CheckoutTipDateTime is expected to be the
        TipRevisionDateTime returned by GetPage() when the page was
        gotten for editing. Contributor is a string representing the
        entity responsible for the the check-in, and logMsg is a log message.
        
        If PageText cannot be checked in or if PageName has ilegal
        characters , raise PutError.
    """

    if _hasSuspiciousCharacters(PageName):
        raise GetError, "suspicious archive name %s." % PageName

    archiveName = _archiveName(Repository, PageName)
    tmpFileName = Repository + os.sep + PageName
    
    f = open(tmpFileName, 'w')
    f.write(PageText)
    f.close()
    if not PageExists(Repository, PageName):
        r = os.system(CI_PATH + ' -t-"%s - initial rev by: %s" %s %s 2>%s' %
                  (logMsg, Contributor, tmpFileName, archiveName, devNull()))
        if r != 0:
            removeFile(tmpFileName)
            raise PutError, "couldn't create archive %s." % (PageName)
    else:
        r = os.system(RCS_PATH + ' -l %s 2>%s' % (archiveName, devNull()))
        if r != 0:
            removeFile(tmpFileName)
            raise PutError, "couldn't lock archive %s." % (PageName)

        (date, time, tipContributor) = \
                _getRevisionDateTimeContributor(Repository, PageName)
        nowTipDateTime = '%s %s' % (date, time)
        if ((Contributor != tipContributor) and \
            (CheckoutTipDateTime != nowTipDateTime)):
            removeFile(tmpFileName)
            raise PutError, "couldn't modify archive %s." % (PageName)

        r = os.system(CI_PATH + ' -m"%s - by: %s" %s %s 2>%s' %
                   (logMsg, Contributor, tmpFileName, archiveName, devNull()))
        if r != 0:
            removeFile(tmpFileName)
            raise PutError, "couldn't update archive %s." % (PageName)
    f = open(HTML_DIR + PageName + '.html', 'w')
    f.write(HTMLText)
    f.close()
    return

def GetLog(Repository, PageName):
    """ Returns a list of edit times and contributors for a page.
    
        The log for PageName in the Repository is returned.
        
        The returned log is a list of tuples: (Revision, RevisionDate,
        Contributor) Where Revision is a revision of the page,
        RevisionDate is the date of that revision, and Contributor
        identifies the entity responsible for the revision.
    """
    if _hasSuspiciousCharacters(PageName):
        raise GetError, "suspicious archive name %s." % PageName
    
    revisions = []
    archiveName = _archiveName(Repository, PageName)
    if PageExists(Repository, PageName):
        lines = os.popen(RLOG_PATH + ' %s 2>%s' %
                         (archiveName, devNull())).readlines()
        index = 0
        while index < len(lines):
            m = re.match('initial rev by:\s(.*)', lines[index])
            if m:
                originator = m.group(1)
                break
            index = index + 1    
                
        while index < len(lines):
            m = re.match('revision\s([0-9.]+)', lines[index])
            if m:
                revision = m.group(1)
                index = index + 1           # next line should have date
                m = re.match('date:\s([0-9/]+)', lines[index])
                if m:
                    revisionDate = m.group(1)
                else:
                    revisionDate = ''
                index = index + 1           # next line should have contrib
                m = re.match('by:\s(.*)', lines[index])
                if m:
                    contributor = m.group(1)
                else:
                    m = re.match('Initial revision', lines[index])
                    if m:
                        contributor = originator
                    else:
                        contributor = ''   
                revisions.append(revision, revisionDate, contributor)
            index = index + 1    
    return revisions

def PageExists(Repository, PageName):
    """ Return existence status of a page.
    
        If the page PageName in the Repository exists, a 1 is
        returned. Otherwise, a 0 is returned.
    """

    if _hasSuspiciousCharacters(PageName):
        raise GetError, "suspicious archive name %s." % PageName

    if os.path.exists(_archiveName(Repository, PageName)):
        return 1
    else:
        return 0

def PageRevisionDate(Repository, PageName):
    """ Return the check-in date of the head revision of a page.
    
        PageName is the name of the page in the Repository.
        If the page does not exist, '' is returned.
    """

    (date, time, contributor) = \
            _getRevisionDateTimeContributor(Repository, PageName)
    return date

def GetRevisionContributor(Repository, PageName):
    (date, time, contributor) = \
            _getRevisionDateTimeContributor(Repository, PageName)
    return contributor

 

def Search(Repository, SearchString):
    """ Search all existing pages in an archive for a string.

        The text of the head revisions of all pages in the archive Repository
        are searched for the string SearchString.
        
        A tuple is returned: (ResultsList, PagesFound, PagesSearched)
            where PagesFound and PagesSearched are numerical statistics.
        ResultsList is a list of tuples: (PageName, InString)
            where PageName is the name of the page where SearchString was found,
            and InString is the entire non-space string in which SearchString
            was found embedded.
    """
    searchResults = []
    pagesSearched = 0
    pagesFound = 0
    for pageName in AllRCSfiles(Repository):
        (page, revision, datetime) = GetPage(Repository, pageName, '')
        
        regExp = re.compile('(?i)(\w*'+SearchString+'\w*)')
        for line in page['text']:
            m = regExp.search(line)
            if m:
                searchResults.append(pageName, m.group(0))
                pagesFound = pagesFound + 1
                break
        pagesSearched = pagesSearched + 1
        
    return (searchResults, pagesFound, pagesSearched)

                                 
def UpdateChangesPage(Repository, ChangesPageName, ChangedPageName, 
                      Contributor, logMsg):
    """ Update a changes page with news of a contribution.
    
        ChangesPageName is the name of a special page in the
        Repository. This page is updated with news that the page
        ChangedPageName was altered by the entity Contributor.

    """
    (pageText, revision, datetime) = GetPage(Repository, ChangesPageName, '')
    dateAuthor = time.strftime('%Y-%m-%d %H:%M', time.gmtime(time.time()))
    dateAuthor += '  ' + Contributor + '\n'
    logFile = StringIO(pageText)
    newTextLines = ''

    # Read the first line
    line = logFile.readline()
    newTextLines += dateAuthor +'\n\t* ' +ChangedPageName +': ' +logMsg +'\n'

    if re.match(dateAuthor, line):
        # Skips the blank line under the heading
        logFile.readline()
    else:
        # Write heading
        newTextLines += '\n' + line 

    for line in logFile.readlines():
        newTextLines += line 

    PutPage(Repository, ChangesPageName, newTextLines, datetime, \
            Contributor, logMsg)
    return

def GetRevision(Repository, PageName):
    """ Return the head revision of a page file.
    
        PageName is the name of the page in teh Repository.
    """
    if _hasSuspiciousCharacters(PageName):
        raise GetError, "suspicious archive name %s." % PageName

    archiveName = _archiveName(Repository, PageName)
    if PageExists(Repository, PageName):
        for line in os.popen(RLOG_PATH + ' -h %s 2>%s' %
                             (archiveName, devNull())).readlines():
            m = re.match('head:\s([0-9.]+)', line)
            if m != None:
                return m.group(1)
    else:
        return '0'          

def _getRevisionDateTimeContributor(Repository, PageName):
    """ Return the check-in date and time of the head revision of a page.
        A tuple (date, time) is returned.
    
        PageName is the name of the page in the Repository.
    """
    if _hasSuspiciousCharacters(PageName):
        raise GetError, "suspicious archive name %s." % PageName

    archiveName = _archiveName(Repository, PageName)
    contributor = ''
    date = ''
    time = ''
    if PageExists(Repository, PageName):
        for line in os.popen(RLOG_PATH + ' -r %s 2>%s' %
                             (archiveName, devNull())).readlines():

            m = re.match('initial rev by:\s(.*)', line)
            if m:
                contributor = m.group(1)
                continue
 
            m = re.match('date:\s([0-9/]+)\s+([0-9:]+)', line)
            if m:
                date = m.group(1)
                time = m.group(2)
                continue

            m = re.match('by:\s(.*)', line)
            if m:
                return(date, time, m.group(1))

    return (date, time, contributor)

def AllRCSfiles(Repository):
    """ Return a list of all RCS files in a repository.
    
        RCS files have a ,v suffix. The names in the list returned
        will not include that suffix, but will include a path relative
        to the root Repository.
    """
    all = []
    for line in os.listdir(Repository):
        words = re.split('\s+', line)
        for word in words:
            if os.path.isdir(Repository + os.sep + word):
	        for file in AllRCSfiles(Repository + os.sep + word):
                    all.append(word + os.sep + file)
	    else:
                m = re.match('(.+),v', word)
                if m == None:
                    continue
                all.append(m.group(1))
    return all        

def _archiveName(Repository, PageName):
    return quotate(Repository + os.sep + PageName + ',v')
          
def _hasSuspiciousCharacters(SuspectString):
   """ Return indication if SuspectString has suspicious characters.
   
       If SuspectString contains any but alphanumeric, dashes, underscores,
       periods, slashes, or colons, return 1. Otherwise return 0.
   """
   
   if re.search('[^\w\-.:\/]', SuspectString):
       return 1
   else:
       return 0    

