# -*- coding: utf-8 -*-
# Copyright (C) 2010  Michał Masłowski  <mtjm@mtjm.eu>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.


"""
Functions caching data from `getmediumurl.reader.URLReader` subclasses.
"""


from __future__ import with_statement

from base64 import b32encode
from heapq import heappush, heappop
import os.path
import errno

from getmediumurl.reader import ReaderError, NotFoundError, StaticURLReader
from getmediumurl.utils import LOGGER


def dict_cache(urlreader, max_size=None):
    """Return a function returning a cached instance of `urlreader`.

    The argument of returned function is an URL string, the same
    return value is used for multiple calls with the same argument.

    The `max_size` argument is the maximum number of URLs cached, or
    `None` (unlimited).  The URLs last applied to the returned
    function are removed first.
    """
    if max_size is not None:
        if not isinstance(max_size, int):
            raise ValueError("max_size must be an int")
        if max_size < 1:
            raise ValueError("max_size must be greater than zero")

    # None maps to the number of accesses, strings map to lists of
    # current cache[None], URL and URLReader.
    cache = {None: 0}
    urls = []  # heap of cache entries

    def cached_urlreader(url):
        """Return `urlreader` instance using the `url`."""
        cache[None] += 1
        try:
            value = cache[url][2]
        except KeyError:
            try:
                value = urlreader(url)
            except ReaderError, ex:
                value = ex
            cache[url] = [cache[None], url, value]
            if max_size is not None:
                heappush(urls, cache[url])
                if len(urls) > max_size:
                    eldest_url = heappop(urls)[1]
                    del cache[eldest_url]
        if isinstance(value, ReaderError):
            raise value
        else:
            return value

    return cached_urlreader


def _get_cached_data(directory, file_name, default=None):
    """Return content of file named `file_name` if it exists.

    Otherwise, return `default`."""
    file_name = os.path.join(directory, file_name)
    try:
        with open(file_name, "rb") as cache_file:
            return cache_file.read()
    except IOError, ex:
        if ex.errno == errno.ENOENT:
            return default
        else:
            raise ex


def _write_cache(directory, file_name, content):
    """Write `content` to cache file."""
    with open(os.path.join(directory, file_name), "w") as cache_file:
        cache_file.write(content)


try:
    _EMPTY = bytes("", "ascii")
except (NameError, TypeError):
    _EMPTY = ""


def directory_cache(urlreader, directory):
    """Return a function returning a cached instance of `urlreader`.

    The argument of returned function is an URL string, the same
    return value is used for multiple calls with the same argument.

    The `directory` argument specifies the directory where the data
    obtained is stored.  The file names are Base32-encoded URLs with
    single letter prefices determining which data is stored.
    """

    def cached_urlreader(url):
        """Return `urlreader` instance using the `url`."""
        encoded_url = url.encode("utf-8")
        encoded = b32encode(encoded_url).decode("ascii")
        # raise exception if cached message found
        message = _get_cached_data(directory, "e" + encoded)
        if message is not None:
            raise NotFoundError(message)
        # get cached data
        content = _get_cached_data(directory, "c" + encoded)
        target = _get_cached_data(directory, "r" + encoded,
                                  encoded_url).decode("utf-8")
        content_type = _get_cached_data(directory, "t" + encoded,
                                        _EMPTY).decode("utf-8")
        # return if found
        if (content is not None) or (target != url):
            LOGGER.debug("Use cached %s" % encoded)
            return StaticURLReader(url, content=content, target=target,
                                   content_type=content_type)
        # get the data and write cache
        try:
            data = urlreader(url)
        except NotFoundError, ex:
            # cache the error message
            _write_cache(directory, "e" + encoded, repr(ex.args))
            raise ex
        else:
            # cache the data
            content = data.content
            if content is not None:
                _write_cache(directory, "c" + encoded, content)
            target = data.url
            if target is not None and target != url:
                _write_cache(directory, "r" + encoded, target)
            content_type = data.content_type
            if content_type != u"":
                _write_cache(directory, "t" + encoded, content_type)
            return data

    return cached_urlreader
