# -*- coding: utf-8 -*-

#    This file is part of Gnomolicious.
#
#    Gnomolicious is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#    (at your option) any later version.
#
#    Gnomolicious is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with Gnomolicious; if not, write to the Free Software
#    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
#    (C) 2005-2006 Nicolas Évrard <nicoe@nutellux.be>

__revision__ = "$Id: delicious.py,v 1.10 2006/03/21 20:05:26 nicoe Exp $"

import sys
import re
import urllib
import urllib2
import StringIO
import datetime
import logging
import md5
import lxml.etree

from BeautifulSoup import BeautifulSoup

if sys.version_info < (2, 4):
    from sets import Set as set

logger = logging.getLogger('gnomolicious')

API_URL = 'http://del.icio.us/api'
NAMESPACES = {'rdf' : 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
              'rss' : 'http://purl.org/rss/1.0/',
              'dc' : 'http://purl.org/dc/elements/1.1/',
              'ephy': 'http://gnome.org/ns/epiphany#',}
TIME_RE = re.compile('''([0-9]{4,4})-([0-9]{2,2})-([0-9]{2,2})
                        T
                        ([0-9]{2,2}):([0-9]{2,2}):([0-9]{2,2})''', re.VERBOSE)

class Singleton(object):

    def __new__(cls, *args, **kwargs):
        if not hasattr(cls, '_inst'):
            cls._inst = object.__new__(cls)
        return cls._inst

class RequestHandler(Singleton):
    
    def __init__(self):
        self.auth_handler = urllib2.HTTPBasicAuthHandler()
        self.opener = urllib2.build_opener(self.auth_handler)

    def addAuthentication(self, username, password):
        self.auth_handler.add_password('del.icio.us API', 'http://del.icio.us',
                                       username, password)

def get_url(url, auth_data={}):
    req = RequestHandler()
    if auth_data:
        req.addAuthentication(auth_data['user'], auth_data['pass'])
    return StringIO.StringIO(req.opener.open(url).read())

class Post(object):
    def __init__(self, xml=None, **kwargs):
        if xml:
            self.__create_from_xml(xml)
        else:
            self.__create_from_attributes(kwargs)

    def __create_from_xml(self, xml):
        self.title = xml.xpath('rss:title', NAMESPACES)[0].text
        self.href = WebPage(xml.xpath('rss:link', NAMESPACES)[0].text,
                            self.title)
        self.time = xml.xpath('dc:date', NAMESPACES)[0].text
        tags = xml.xpath('dc:subject', NAMESPACES)

        # Why does del.icio.us rss comes with only one dc:subject ?
        if len(tags) == 1: 
            self.tags = set([Tag(tag, self.href)
                             for tag in tags[0].text.split()])
        else:
            self.tags = set([Tag(tag.text, self.href) for tag in tags])

    def __create_from_attributes(self, attrs):
        self.title = attrs['title']
        self.href = WebPage(attrs['href'], self.title)
        self.time = attrs['time']
        self.tags = set([Tag(tag, self.href) for tag in attrs['tags'].split()])

    def __hash__(self):
        return hash(self.href)

class User(object):

    def __init__(self, uname, password=''):
        self.username = uname
        self.password = password
        self.__last_update = datetime.datetime.min
        self.__tags = []
        self.__allposts = []

    def _get_url(self):
        return 'http://del.icio.us/%s' % self.username
    url = property(_get_url)

    def _get_rss_url(self):
        return 'http://del.icio.us/rss/%s' % self.username
    rss_url = property(_get_rss_url)

    def __hash__(self):
        return hash(self.username)

    def _get_posts(self):
        doctree = lxml.etree.parse(get_url(self.rss_url))
        latest_posts = doctree.xpath('/rdf:RDF/rss:item', NAMESPACES)
        for post in latest_posts:
            yield Post(post)
    posts = property(_get_posts)

    def _get_all_posts(self):
        if not self.__allposts:
            auth = {'user' : self.username, 'pass' : self.password}
            all_post_url = 'http://del.icio.us/api/posts/all'
            doctree = lxml.etree.parse(get_url(all_post_url, auth))
            self.__allposts = [Post(href=p.attrib['href'],
                                    title=p.attrib['description'],
                                    time=p.attrib['time'],
                                    tags=p.attrib['tag'])
                               for p in doctree.xpath('//post')]
        return self.__allposts
    allposts = property(_get_all_posts)

    def _fetch_tags(self):
        self.__last_update = datetime.datetime.now()
        self.__tags = set()
        auth = {'user' : self.username, 'pass' : self.password}
        doctree = lxml.etree.parse(get_url('%s/tags/get' % API_URL, auth))
        for tag in doctree.xpath('/tags/*'):
            self.__tags.add(Tag(tag.attrib['tag'], count=tag.attrib['count']))

    def _get_tags(self):
        auth = {'user' : self.username, 'pass' : self.password}
        update = lxml.etree.parse(get_url('%s/posts/update' % API_URL, auth))
        date_str = update.xpath('/update[1]')[0].attrib['time']
        update = datetime.datetime(*map(int, TIME_RE.match(date_str).groups()))
        if update > self.__last_update:
            self._fetch_tags()
        return self.__tags
    tags = property(_get_tags)

    def do_post(self, url, desc, ext, tags):
        auth = {'user' : self.username, 'pass' : self.password}
        data = {'url' : url,
                'description' : desc,
                'extended' : ext,
                'tags' : tags}
        posturl = '%s/posts/add?' % API_URL + urllib.urlencode(data)
        get_url(posturl, auth)
        if self.__allposts:
            today = datetime.datetime.today()
            self.__allposts.prepend(Post(href=url, title=desc, tags=tags,
                                         time=today.isoformat()+'Z'))

class SingletonParametrized(object):

    _instances = {}

    def __new__(cls, param, *args, **kwargs):
        """
        >>> class A(SingletonParametrized):
        ...     def __init__(self, p, num):
        ...         self.p = p
        ...         self.num = num
        
        >>> t1 = A('t', 1)
        >>> t2 = A('t', 2)
        >>> t3 = A('x', 3)

        >>> t1 is t2
        True
        >>> t1.num == t2.num
        True
        >>> t1.num == 2
        True
        >>> t1 is t3
        False
        >>> t3.num
        3
        """
        param = param.lower()
        if param not in cls._instances:
            cls._instances[param] = object.__new__(cls)
        return cls._instances[param]

class Tag(SingletonParametrized):

    def __new__(cls, tagname, hrefs='', count=0):
        inst = SingletonParametrized.__new__(cls, tagname)
        if not hasattr(inst, 'hrefs'):
            inst.hrefs = set()
        return inst

    def __init__(self, tagname, href='', count=0):
        """
        >>> test = Tag('test')
        >>> t1 = Tag('test', WebPage('http://t1.test.com'))
        >>> te = Tag('test', WebPage('http://t2.test.com'))
        >>> len(test.hrefs)
        2
        """
        self.name = tagname.lower()
        if count:
            self._count = count
        if href:
            self.hrefs.add(href)

    def __repr__(self):
        return '<Tag "%s">' % self.name

    def __hash__(self):
        return hash(self.name)

    def _get_url(self):
        return 'http://del.icio.us/tag/%s' % self.name
    url = property(_get_url)

    def _get_rss_url(self):
        return 'http://del.icio.us/rss/tag/%s' % self.name
    url = property(_get_rss_url)

    def _get_count(self):
        return max(len(self.hrefs), self._count)
    count = property(_get_count)

class WebPage(SingletonParametrized):

    def __init__(self, url, description=''):
        self.url = url
        self.description = description
        self.__last_fetch = datetime.datetime.min
        self.__tags = {}

    def __repr__(self):
        return '<WebPage "%s">' % self.url

    def __eq__(self, other):
        if isinstance(other, (str, unicode)):
            return self.url == other
        elif isinstance(other, WebPage):
            return self.url == other.url
        else:
            return False

    def _get_html(self):
        url_hash = md5.new(self.url).hexdigest()
        page = get_url('http://del.icio.us/url/%s?settagview=list' % url_hash)
        return page.read()

    def _beautifoulsoup_fetch_tags(self):
        self.__tags = {}
        tree = BeautifulSoup(self._get_html())
        sidebar = tree.fetch('div', {'class' : 'sidebar-inner'})
        if sidebar:
            for li in sidebar[0].ul.ul.fetch('li'):
                self.__tags[li.a.string] = int(li.span.string)

    def _get_tags(self):
        """
        >>> google = WebPage('http://www.google.com/')
        >>> google_tags = google.tags
        >>> len(google_tags) > 0
        True
        """
        diff_date = datetime.datetime.now() - self.__last_fetch
        if (diff_date.seconds + diff_date.days * (24*60*60)) >= 600:
            self.__last_fetch = datetime.datetime.now()
            self._beautifoulsoup_fetch_tags()
        return self.__tags
    tags = property(_get_tags)

def _test():
    import doctest
    doctest.testmod()

if __name__ == '__main__':
    _test()
