"""A multi-process safe File-based cache backend with atomic add()

Based on django.core.cache.backends.file_based.
It requires a working fcntl.lockf implementation.

Locking strategy
================

Writing a new cache entry is subject to multi-processing/threading issues. It
must be prevented that readers see incomplete files and that multiple writers 
write into the same file. To accomplish this, a write happens in the following
steps:
    
    - create a file ".new" with O_CREAT|O_EXCL, so that only one writer can
      write at the same time.
      
    - If the open fails, the writer checks whether this is a stale file 
      (should not happen, but better be safe). In this case, it removes the 
      file, but it returns in either case without doing anything, and the other 
      writer "wins". In case of a stale file, this means that the cache entry
      is lost.
      
    - If the open succeeds, the pickles are written to the .new file and the
      .new file is then renamed to the final destination.

Another operation that could cause conflicts is culling, i.e. removing cache
entries when the cache has grown beyond its maximum size. This only removes
the actual cache entries and not the .new files, so it does not conflict with
normal cache writes. 

The original version in django also removed empty
directories--that's pretty hard to do in a safe manner without a global lock,
so it now does not remove empty directories any more.

To prevent multiple processes removing cache entries at the same time, there's
a lock file for culling, {CACHE_ROOT}/cull_lock. Before even checking whether
to cull anything, a process tries a non-blocking fcntl lock on this file. If
the lock fails, there's another process culling, and that's enough. Stale lock
files should not be an issue since the OS releases locks when a process finishes.
"""

import md5, fcntl, time, os, logging
try:
    import cPickle as pickle
except ImportError:
    import pickle
from django.core.cache.backends.base import BaseCache
from errno import EEXIST, ENOENT, EAGAIN
from os import O_WRONLY, O_EXCL, O_CREAT

class CacheClass(BaseCache):
    def __init__(self, dir, params):
        BaseCache.__init__(self, params)
        
        self._logger = logging.getLogger('cache')

        max_entries = params.get('max_entries', 300)
        try:
            self._max_entries = int(max_entries)
        except (ValueError, TypeError):
            self._max_entries = 300

        cull_frequency = params.get('cull_frequency', 3)
        try:
            self._cull_frequency = int(cull_frequency)
        except (ValueError, TypeError):
            self._cull_frequency = 3

        self._dir = dir
        if not os.path.exists(self._dir):
            self._createdir()

    def add(self, key, value, timeout=None):
        self._logger.debug("add: %s" % key)
        if self.has_key(key):
            self._logger.debug("cache hit in add: %s" % key)
            return False
        self.set(key, value, timeout)
        return True
    

    def get(self, key, default=None):
        
        f = None
        fname = self._key_to_file(key)
        try:
            try:
                f = open(fname, 'rb')
                exp = pickle.load(f)
                now = time.time()
                if exp < now:
                    self._logger.debug("timeout in get: %s" % key)
                    f.close()
                    f = None
                    self._delete(fname)
                else:
                    self._logger.debug("cache hit in get: %s" % key)
                    return pickle.load(f)
            except (IOError, OSError), err:
                self._logger.debug("catched exception in get %s: %s" % (key, err))
                # entry does not exist, another process might have removed it
                # due to timeout, or culling might have removed it.
                pass
        finally:
            if f is not None:
                f.close()
        return default

    def set(self, key, value, timeout=None):
        self._cull()
        f = None
        try:
            fname = self._key_to_file(key)
            dirname = os.path.dirname(fname)
            if not os.path.exists(dirname):
                try:
                    os.makedirs(dirname)
                except OSError:
                    if not os.path.exists(dirname):
                        raise  # could not create!
            f = self._open_exclusively(fname + ".new")
            if f is None:
                # another process is creating an entry. 
                # pretend that that process wins the race.
                self._logger.info("skip setting %s, f is None" % key)
                return
            self._write(f, value, timeout)
            f.flush()
            os.rename(fname + '.new', fname)
            self._logger.debug("set %s" % key)
        finally:
            if f is not None:
                f.close()

    def _open_exclusively(self, fname):
        """create the file fname exclusively and return a file object. If 
        the file already exists, return None. Also cleans up stale files.
        """
        dirname = os.path.dirname(fname)
        if not os.path.exists(dirname):
            try:
                os.makedirs(dirname)
            except OSError:
                if not os.path.exists(dirname):
                    raise  # could not create!
        try:
            fd = os.open(fname, O_CREAT|O_EXCL|O_WRONLY)
            return os.fdopen(fd, "wb")
        except OSError, err:
            if err.errno == EEXIST:
                # find out if the file is stale, though this should not happen ...
                try:
                    mtime = os.stat(fname).st_mtime
                    if time.time() - mtime > 60:
                        self._logger.info("removing stale file: %s" % fname)
                        os.unlink(fname)
                except OSError, err:
                    if err.errno != ENOENT:
                        raise
                return None
            else:
                raise

    def _write(self, f, value, timeout):
        """write the pickle to the file f
        """
        if timeout is None:
            timeout = self.default_time
        now = time.time()
        pickle.dump(now + timeout, f, pickle.HIGHEST_PROTOCOL)
        pickle.dump(value, f, pickle.HIGHEST_PROTOCOL)

    def delete(self, key):
        print "delete %s" % key
        try:
            self._delete(self._key_to_file(key))
        except (IOError, OSError):
            pass

    def _delete(self, fname):
        self._logger.debug("deleting file %s" % fname)
        try:
            os.remove(fname)
        except (IOError, OSError):
            # culling might have removed the entry. Ignore.
            pass

    def has_key(self, key):
        fname = self._key_to_file(key)
        try:
            f = open(fname, 'rb')
            try:
                exp = pickle.load(f)
                now = time.time()
            finally:
                f.close()
            if exp < now:
                self._logger.debug("timeout in has_key: %s" % key)
                self._delete(fname)
                return False
            else:
                return True
        except (IOError, OSError), err:
            # culling might have removed the file, or pickle doesn't work
            self._logger.debug("has_key catched exception for %s: %s" % (key, err))
            return False

    def _cull(self):
        """Remove cache entries if the cache has grown beyond its limit.
        """
        cull_lock = None
        try:
            try:
                cull_lock = open(os.path.join(self._dir, 'cull_lock'), "ab")
                fcntl.lockf(cull_lock.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
            except IOError, err:
                if err.errno == EAGAIN:
                    self._logger.debug("skip culling since locked")
                    # Could not aquire lock, another process is already culling.
                    return
                else:
                    raise
            if int(self._num_entries) < self._max_entries:
                return
            frequency = self._cull_frequency            
            if self._cull_frequency == 0:
                frequency = 1
            counter = 0
            
            self._logger.debug("culling ...")
            for (root, _, files) in os.walk(self._dir):
                for file in files:
                    try:
                        path = os.path.join(root, file)
                        if (not file == 'cull_lock'
                            and not file.endswith('.new')
                            and os.path.isfile(path)
                           ):
                            if counter % frequency == 0:
                                os.unlink(path)
                            counter += 1
                    except IOError, err:
                        if err.errno == ENOENT:
                            pass  # removed by other process
                        else:
                            raise
            self._logger.info("culled 1/%d of %d files" 
                              % (frequency, counter))
        finally:
            if cull_lock:
                cull_lock.close()

    def _createdir(self):
        try:
            os.makedirs(self._dir)
        except OSError:
            raise EnvironmentError, "Cache directory '%s' does not exist and could not be created'" % self._dir

    def _key_to_file(self, key):
        """
        Convert the filename into an md5 string. We'll turn the first couple
        bits of the path into directory prefixes to be nice to filesystems
        that have problems with large numbers of files in a directory.
        
        Thus, a cache key of "foo" gets turnned into a file named
        ``{cache-dir}ac/bd/18db4cc2f85cedef654fccc4a4d8``.
        """
        path = md5.new(key.encode('utf-8')).hexdigest()
        path = os.path.join(path[:2], path[2:4], path[4:])
        return os.path.join(self._dir, path)

    def _get_num_entries(self):
        count = 0
        for _,_,files in os.walk(self._dir):
            count += len([fname for fname in files if not fname.endswith(".new")])
        return count
    _num_entries = property(_get_num_entries)

