/*
** Copyright (C) 2004-2012 by Carnegie Mellon University.
**
** @OPENSOURCE_HEADER_START@
**
** Use of the SILK system and related source code is subject to the terms
** of the following licenses:
**
** GNU Public License (GPL) Rights pursuant to Version 2, June 1991
** Government Purpose License Rights (GPLR) pursuant to DFARS 252.227.7013
**
** NO WARRANTY
**
** ANY INFORMATION, MATERIALS, SERVICES, INTELLECTUAL PROPERTY OR OTHER
** PROPERTY OR RIGHTS GRANTED OR PROVIDED BY CARNEGIE MELLON UNIVERSITY
** PURSUANT TO THIS LICENSE (HEREINAFTER THE "DELIVERABLES") ARE ON AN
** "AS-IS" BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY
** KIND, EITHER EXPRESS OR IMPLIED AS TO ANY MATTER INCLUDING, BUT NOT
** LIMITED TO, WARRANTY OF FITNESS FOR A PARTICULAR PURPOSE,
** MERCHANTABILITY, INFORMATIONAL CONTENT, NONINFRINGEMENT, OR ERROR-FREE
** OPERATION. CARNEGIE MELLON UNIVERSITY SHALL NOT BE LIABLE FOR INDIRECT,
** SPECIAL OR CONSEQUENTIAL DAMAGES, SUCH AS LOSS OF PROFITS OR INABILITY
** TO USE SAID INTELLECTUAL PROPERTY, UNDER THIS LICENSE, REGARDLESS OF
** WHETHER SUCH PARTY WAS AWARE OF THE POSSIBILITY OF SUCH DAMAGES.
** LICENSEE AGREES THAT IT WILL NOT MAKE ANY WARRANTY ON BEHALF OF
** CARNEGIE MELLON UNIVERSITY, EXPRESS OR IMPLIED, TO ANY PERSON
** CONCERNING THE APPLICATION OF OR THE RESULTS TO BE OBTAINED WITH THE
** DELIVERABLES UNDER THIS LICENSE.
**
** Licensee hereby agrees to defend, indemnify, and hold harmless Carnegie
** Mellon University, its trustees, officers, employees, and agents from
** all claims or demands made against them (and any related losses,
** expenses, or attorney's fees) arising out of, or relating to Licensee's
** and/or its sub licensees' negligent use or willful misuse of or
** negligent conduct or willful misconduct regarding the Software,
** facilities, or other rights or assistance granted by Carnegie Mellon
** University under this License, including, but not limited to, any
** claims of product liability, personal injury, death, damage to
** property, or violation of any laws or regulations.
**
** Carnegie Mellon University Software Engineering Institute authored
** documents are sponsored by the U.S. Department of Defense under
** Contract FA8721-05-C-0003. Carnegie Mellon University retains
** copyrights in all material produced under this contract. The U.S.
** Government retains a non-exclusive, royalty-free license to publish or
** reproduce these documents, or allow others to do so, for U.S.
** Government purposes only pursuant to the copyright license under the
** contract clause at 252.227.7013.
**
** @OPENSOURCE_HEADER_END@
*/

/*
** bagtree.c
**
** Implementation of the bagtree library according to bagtree.h
**
*/

#include <silk/silk.h>

RCSIDENT("$SiLK: bagtree.c 372a8bc31d8a 2012-02-10 21:55:28Z mthomas $");

#include <silk/utils.h>
#include <silk/bagtree.h>


/* LOCAL DEFINES AND TYPEDEFS */

/* Version number to write into the Bag's header */
#define RWBAG_FILE_VERSION 3

/* Size of records for that version */
#define RWBAG_RECORD_LENGTH (sizeof(skBagKey_t)+sizeof(skBagCounter_t))


#define MIN_LEVELS 1
#define MAX_LEVELS 32
#define MIN_KEY_SIZE 1
#define MAX_KEY_SIZE 32
#define MIN_LEVEL_BITS 1
#define MAX_LEVEL_BITS 31       /* Size 32 would cause our 1 << size
                                 * operation to fail. */


/* Definition of stats */
typedef struct skBagStats_st {
    /* count of internal nodes allocated */
    uint32_t                nodes;
    /* count of leaf blocks allocated */
    uint32_t                leaves;
    /* number of bytes allocated to nodes */
    uint64_t                nodes_size;
    /* number of bytes allocated to leaves */
    uint64_t                leaves_size;
    /* count of entries inserted in the tree */
    uint64_t                keys_inserted;
    /* count of entries inserted in the tree */
    uint64_t                unique_keys;
    /* minimum (non-zero) counter value */
    skBagCounter_t          min_counter;
    /* maximum counter value */
    skBagCounter_t          max_counter;
    /* minimum key inserted */
    skBagKey_t              min_key;
    /* maximum key inserted */
    skBagKey_t              max_key;
} skBagStats_t;


/* A node either points to another node or to a counter */
typedef union skBagNode_un skBagNode_t;
union skBagNode_un {
    skBagNode_t            *child;
    skBagCounter_t         *leaf;
};

/* The SiLK Bag */
struct skBag_st {
    /* the root node of the bag */
    skBagNode_t             root;

    /* number of levels in the tree */
    skBagLevel_t            levels;

    /* array of length "levels", each entry is the number of bits
     * encoded in a level. */
    skBagLevelsize_t        level_bits[MAX_LEVELS];

    /* array of length "levels", each entry is the number of entries
     * in a level. */
    skBagKey_t              level_size[MAX_LEVELS];

    /* the offset at every level: the sum of the offsets at every
     * level below this one. */
    skBagLevelsize_t        level_offset[MAX_LEVELS];
};


/* the number of entries in a node/leaf */
typedef uint32_t skBagBlocksize_t;

/* Definition of the iterator structure */
struct skBagIterator_st {
    /* pointer to the bag to which this iterator was created */
    const skBag_t          *bag;

    /* path of offsets through the tree, bag->levels insize */
    skBagBlocksize_t        offset_path[MAX_LEVELS];

    /* if true, iterator skips the first counter in the leaf block, so
     * that we don't re-find the same node with subsequent calls to
     * skBagIteratorNext(). */
    int                     f_skip_counter;
};


/* return number of bits of the key in use at this 'level' */
#define SKBAG_GET_LEVEL_BITS(gls_bag, gls_level)        \
    ((gls_bag)->level_bits[(gls_level)])

/* return the bit-offset into key at this 'level' */
#define SKBAG_GET_LEVEL_OFFSET(glo_bag, glo_level)      \
    ((glo_bag)->level_offset[(glo_level)])

/* return number of nodes/leaves at this 'level' */
#define SKBAG_GET_LEVEL_BLOCKS(glb_bag, glb_level)              \
    ((glb_bag)->level_size[(glb_level)])

/* return the portion of 'key' in-use at this 'level'.  used to index
 * into array of nodes. */
#define SKBAG_GET_KEY_BITS(gkb_key, gkb_bag, gkb_level)                 \
    GET_MASKED_BITS((gkb_key),                                          \
                    SKBAG_GET_LEVEL_OFFSET((gkb_bag), (gkb_level)),     \
                    SKBAG_GET_LEVEL_BITS((gkb_bag), (gkb_level)))

/* return TRUE if 'counter' is the NULL counter */
#define SKBAG_COUNTER_IS_NULL(cin_counter)      \
    (SKBAG_COUNTER_MIN == *(cin_counter))

/* copy the value pointed at by 'cc_value_ptr' into 'cc_counter_ptr' */
#define SKBAG_COUNTER_COPY(cc_counter_ptr, cc_value_ptr)        \
    (*(cc_counter_ptr) = *(cc_value_ptr))


static const skBagCounter_t bag_counter_null = SKBAG_COUNTER_MIN;
static const skBagCounter_t bag_counter_incr = 1;

const skBagCounter_t *skbag_counter_null = &bag_counter_null;
const skBagCounter_t *skbag_counter_incr = &bag_counter_incr;


/* FUNCTION DEFINITIONS */

/*
 *  counter = bagAllocToCounter(bag, key);
 *
 *    Similar to bagGetCounterPointer(), but will allocates nodes
 *    along the way as necessary.  Returns NULL if a node cannot be
 *    allocated.
 */
static skBagCounter_t *bagAllocToCounter(
    skBag_t                *bag,
    const skBagKey_t       *key)
{
    skBagNode_t *subtree;
    skBagLevel_t lvl;
    uint32_t key_bits;

    assert(bag != NULL);
    assert(key != NULL);

    subtree = &bag->root;
    for (lvl = 0; lvl < bag->levels - 1; ++lvl) {
        /* if we have are not the leaf level, allocate node */
        if (subtree->child == NULL) {
            /* no child node exists, allocating block */
            uint32_t child_count = SKBAG_GET_LEVEL_BLOCKS(bag, lvl);

            subtree->child = calloc(child_count, sizeof(skBagNode_t));
            if (subtree->child == NULL) {
                return NULL;
            }
        }

        key_bits = SKBAG_GET_KEY_BITS(*key, bag, lvl);
        subtree = &(subtree->child[key_bits]);
    }

    /* we are currently on the last node level, our child should be a
     * leaf  */
    if (subtree->leaf == NULL) {
        uint32_t leaf_count = SKBAG_GET_LEVEL_BLOCKS(bag, lvl);

        /* actually not a skBagNode_t we are allocating */
        subtree->leaf = calloc(leaf_count, sizeof(skBagCounter_t));
        if (subtree->leaf == NULL) {
            return NULL;
        }
    }

    key_bits = SKBAG_GET_KEY_BITS(*key, bag, bag->levels - 1);
    return &subtree->leaf[key_bits];
}


/*
 *  bagComputeStats(bag, stats);
 *
 *    Given the bag 'bag', update the 'stats' structure with various
 *    statistics about the bag.
 */
static void bagComputeStats(
    const skBag_t  *bag,
    skBagStats_t   *stats)
{
    const skBagNode_t    *working_node[MAX_LEVELS];
    uint32_t        working_index[MAX_LEVELS];
    uint32_t        working_max[MAX_LEVELS];
    skBagLevel_t    current_level;
    const skBagNode_t    *current_node;
    uint32_t        current_index;
    uint32_t        current_max;
    skBagLevel_t    leaf_level;
    const skBagCounter_t *counter;
    uint32_t        leaf_count;
    uint32_t        i;
    skBagKey_t      key;

    assert(bag != NULL);
    assert(stats != NULL);

    memset(stats, 0, sizeof(skBagStats_t));

    leaf_level = bag->levels - 2;

    current_level = 0;
    current_node  = bag->root.child;
    current_index = 0;
    current_max   = SKBAG_GET_LEVEL_BLOCKS(bag, current_level);

    if (current_node == NULL) {
        return;
    }

    ++stats->nodes;
    stats->nodes_size += current_max * sizeof(skBagNode_t);

    key = 0;
    stats->min_key = SKBAG_KEY_MAX;
    stats->min_counter = SKBAG_COUNTER_MAX;

    for (;;) {
        if (current_level >= leaf_level) {
            /* we are in a node pointing to leaves */
            for (; current_index < current_max; ++current_index) {
                if (current_node[current_index].leaf) {

                    SET_MASKED_BITS(key, current_index,
                                    SKBAG_GET_LEVEL_OFFSET(bag, current_level),
                                    SKBAG_GET_LEVEL_BITS(bag, current_level));

                    ++current_level;
                    leaf_count = SKBAG_GET_LEVEL_BLOCKS(bag, current_level);
                    ++stats->leaves;
                    stats->leaves_size += leaf_count * sizeof(skBagCounter_t);

                    for (i = 0, counter = current_node[current_index].leaf;
                         i < leaf_count;
                         ++i, ++counter)
                    {
                        if (!SKBAG_COUNTER_IS_NULL(counter)) {
                            ++stats->unique_keys;
                            ++stats->keys_inserted;
                            if (*counter > stats->max_counter) {
                                stats->max_counter = *counter;
                            }
                            if (*counter < stats->min_counter) {
                                stats->min_counter = *counter;
                            }

                            SET_MASKED_BITS(key, i,
                                            SKBAG_GET_LEVEL_OFFSET(bag, current_level),
                                            SKBAG_GET_LEVEL_BITS(bag, current_level));
                            if (key > stats->max_key) {
                                stats->max_key = key;
                            }
                            if (key < stats->min_key) {
                                stats->min_key = key;
                            }
                        }
                    }
                    --current_level;
                }
            }
        } else {
            /* we are in a node pointing to more nodes */
            /* Find the next non-empty node */
            while ((current_index < current_max) &&
                   (current_node[current_index].child == NULL))
            {
                current_index++;
            }
            /* Push information about the current level's state, and
               move to the next level */
            if (current_index < current_max) {
                ++stats->nodes;
                stats->nodes_size += current_max * sizeof(skBagNode_t);

                SET_MASKED_BITS(key, current_index,
                                SKBAG_GET_LEVEL_OFFSET(bag, current_level),
                                SKBAG_GET_LEVEL_BITS(bag, current_level));
                working_index[current_level] = current_index;
                working_node[current_level]  = current_node;
                working_max[current_level]   = current_max;
                current_level++;
                current_node  = current_node[current_index].child;
                current_index = 0;
                current_max   = SKBAG_GET_LEVEL_BLOCKS(bag, current_level);
            }
        }

        if (current_index >= current_max) {

            /* Are we finished? */
            if (current_level == 0) {
                break;
            }

            /* Pop the last level */
            current_level--;
            current_node  = working_node[current_level];
            current_index = working_index[current_level] + 1;
            current_max   = working_max[current_level];
        }

    } /* for (;;) */
}


/*
 *  counter = bagGetCounterPointer(bag, key);
 *
 *    Given a bag 'bag' and a pointer to a key 'key', return a
 *    pointer to the key's counter, or return NULL if the key does not
 *    exist.
 */
static skBagCounter_t *bagGetCounterPointer(
    const skBag_t          *bag,
    const skBagKey_t       *key)
{
    const skBagNode_t *subtree;
    skBagLevel_t lvl;
    uint32_t key_bits;

    assert(bag != NULL);
    assert(key != NULL);

    subtree = &(bag->root);
    for (lvl = 0; lvl < bag->levels - 1; ++lvl) {
        if (subtree->child == NULL) {
            return NULL;
        }
        key_bits = SKBAG_GET_KEY_BITS(*key, bag, lvl);
        subtree = &subtree->child[key_bits];
    }

    if (subtree->child == NULL) {
        return NULL;
    }
    key_bits = SKBAG_GET_KEY_BITS(*key, bag, bag->levels - 1);
    return &subtree->leaf[key_bits];
}


/*
 *  bagNodesFree(bag);
 *
 *    Given the bag 'bag', free all the nodes that make up the bag's
 *    tree by iterating through the nodes and leaves.
 */
static void bagNodesFree(skBag_t *bag)
{
    skBagNode_t  *working_node[MAX_LEVELS];
    uint32_t      working_index[MAX_LEVELS];
    uint32_t      working_max[MAX_LEVELS];
    skBagLevel_t  current_level;
    skBagNode_t  *current_node;
    uint32_t      current_index;
    uint32_t      current_max;
    skBagLevel_t  leaf_level;

    assert(bag != NULL);

    leaf_level = bag->levels - 2;

    current_level = 0;
    current_node  = bag->root.child;
    current_index = 0;
    current_max   = SKBAG_GET_LEVEL_BLOCKS(bag, current_level);

    if (current_node == NULL) {
        return;
    }

    for (;;) {
        if (current_level >= leaf_level) {
            /* we are in a node pointing to leaves */
            for (; current_index < current_max; ++current_index) {
                if (current_node[current_index].leaf) {
                    free(current_node[current_index].leaf);
                }
            }
        } else {
            /* we are in a node pointing to more nodes */
            /* Find the next non-empty node */
            while ((current_index < current_max) &&
                   (current_node[current_index].child == NULL))
            {
                current_index++;
            }
            /* Push information about the current level's state, and
               move to the next level */
            if (current_index < current_max) {
                working_index[current_level] = current_index;
                working_node[current_level]  = current_node;
                working_max[current_level]   = current_max;
                current_level++;
                current_node  = current_node[current_index].child;
                current_index = 0;
                current_max   = SKBAG_GET_LEVEL_BLOCKS(bag, current_level);
            }
        }

        if (current_index >= current_max) {
            /* Free the node we were working on */
            free(current_node);

            /* Are we finished? */
            if (current_level == 0) {
                break;
            }

            /* Pop the last level */
            current_level--;
            current_node  = working_node[current_level];
            current_index = working_index[current_level] + 1;
            current_max   = working_max[current_level];
        }

    } /* for (;;) */
}


/*
 *  status = bagReadHeader(stream, &bag_version, &swap_flag);
 *
 *    Read the generic header for the SiLK bag from 'stream'.  Set
 *    '*bag_version' to the version of the bag.  Set '*swap_flag' to 1
 *    if the key/counter pairs in the bag must be byte swapped on
 *    read, or 0 otherwise.
 */
static skBagErr_t bagReadHeader(
    skstream_t             *stream_in,
    fileVersion_t          *bag_version,
    int                    *swap_flag)
{
    sk_file_header_t *hdr;
    int rv;

    assert(stream_in);
    assert(bag_version);
    assert(swap_flag);

    rv = skStreamReadSilkHeader(stream_in, &hdr);
    if (rv) {
        skStreamPrintLastErr(stream_in, rv, &skAppPrintErr);
        return SKBAG_ERR_READ;
    }

    rv = skStreamCheckSilkHeader(stream_in, FT_RWBAG, 1, RWBAG_FILE_VERSION,
                                 &skAppPrintErr);
    if (rv) {
        return SKBAG_ERR_READ;
    }

    *bag_version = skHeaderGetRecordVersion(hdr);
    if ((*bag_version < 2) &&
        (SK_COMPMETHOD_NONE != skHeaderGetCompressionMethod(hdr)))
    {
        skAppPrintErr("Bag files prior to v2 do not support compression");
        return SKBAG_ERR_READ;
    }

    *swap_flag = !skHeaderIsNativeByteOrder(hdr);

    return SKBAG_OK;
}


/*
 *  status = bagReadAddToCounter(key, counter, bag);
 *
 *    Callback function used by skBagAddFromStream().  For 'key' in
 *    'bag', add 'counter' to its counter value.
 */
static skBagErr_t bagReadAddToCounter(
    const skBagKey_t       *key,
    const skBagCounter_t   *counter,
    void                   *v_bag)
{
    return skBagAddToCounter((skBag_t*)v_bag, key, counter);
}


/*
 *  status = bagReadSetCounter(key, counter, bag);
 *
 *    Callback function used by skBagRead().  Set the 'key' to
 *    'counter' in 'bag'.
 */
static skBagErr_t bagReadSetCounter(
    const skBagKey_t       *key,
    const skBagCounter_t   *counter,
    void                   *v_bag)
{
    return skBagSetCounter((skBag_t*)v_bag, key, counter);
}


/*
 *  counter = bagTraverseSubtree(iter, subtree, lvl, at_start);
 *
 *    Walk across the subtree at 'subtree', whose level in the bag
 *    is 'lvl', and find the next non-zero counter.  If the iterator
 *    has f_skip_counter set to true, skip the first counter---whether
 *    empty or not.  If skip_counter is false, then start at the first
 *    counter.
 *
 *    'at_start' is a boolean value.  If true, start offset from the
 *    apropriate entry in offset_path.  Also, when making recursive
 *    calls, only set to true once; that way, the leftmost path of the
 *    tree uses offsets, but once beyond that, search whole blocks.
 *
 *    Returns a pointer to the next counter found, or NULL if no
 *    counter was found.
 *
 *    Also, sets iter->offset_path to point to the counter found,
 *    and f_skip_counter if a counter is found, so that it doesn't
 *    re-find that counter on the next call.
 */
static skBagCounter_t *bagTraverseSubtree(
    skBagIterator_t        *iter,
    const skBagNode_t      *subtree,
    skBagLevel_t            lvl,
    int                     f_use_iterator_start)
{
    skBagBlocksize_t block_cur;
    skBagBlocksize_t block_size;
    skBagCounter_t *counter = NULL;

    /* check input */
    assert(iter != NULL);
    assert(iter->bag != NULL);
    assert(subtree != NULL);
    assert(lvl < iter->bag->levels);

    if (subtree->child == NULL) {
        return NULL;
    }

    if (f_use_iterator_start) {
        block_cur = iter->offset_path[lvl];
    } else {
        block_cur = (skBagBlocksize_t) 0;
    }
    block_size = SKBAG_GET_LEVEL_BLOCKS(iter->bag, lvl);

    /*
     * if we are at (or past!) the last node in the leaf, the counter
     * will not be found in this subtree
     */
    if (block_cur >= block_size) {
        return NULL;
    }

    if (lvl < iter->bag->levels - 1) {
        /*
         * if we are not at the bottom level of nodes, for each
         * non-null child pointer, recurse one level deeper.
         */
        for (; block_cur < block_size; ++block_cur) {
            if (subtree->child[block_cur].child != NULL) {
                /*
                 * if the counter is NULL, there is no subtree from
                 * this offset.
                 */
                counter = bagTraverseSubtree(iter,
                                             &subtree->child[block_cur],
                                             lvl + 1, f_use_iterator_start);
                f_use_iterator_start = 0;
            }

            /*
             * at this point, counter points to a found counter, or
             * still is NULL.  if it points to a valid counter, set
             * the iterator path for the current level, and return to
             * the next shallow level.
             */
            if (counter != NULL) {
                iter->offset_path[lvl] = block_cur;
                return counter;
            }
        }
    } else {
        /* at the leaf level, so check for non-zero counters */

        /*
         * if we follow the path straight down, we will re-find the
         * same counter we found on the last iteration.  to avoid
         * that, we skip the first counter on that straight path.
         */
        if (iter->f_skip_counter == 1) {
            ++block_cur;
            iter->f_skip_counter = 0;
        }

        for (; block_cur < block_size; ++block_cur) {
            if ( !SKBAG_COUNTER_IS_NULL(&subtree->leaf[block_cur])) {
                /* we found a counter. */
                iter->offset_path[lvl] = block_cur;
                if (lvl == iter->bag->levels - 1) {
                    iter->f_skip_counter = 1;
                }
                return &subtree->leaf[block_cur];
            }
        }
    }

    /*
     * no counter found.  if we are at the most shallow level, reset
     * the iterator (so subsequent calls will re-start at the
     * beginning of the tree.)
     */
    if (lvl == 0) {
        skBagIteratorReset(iter);
    }
    return NULL;
}





/* add contents of file to existing bag. increment counters for
 * overlapping keys. */
skBagErr_t skBagAddFromStream(
    skBag_t                *bag,
    skstream_t             *stream_in)
{
    if (bag == NULL || stream_in == NULL) {
        return SKBAG_ERR_INPUT;
    }

    return skBagProcessStream(stream_in, bag, &bagReadAddToCounter);
}


/* add value 'value_added' to counter for 'key'; create key if needed */
skBagErr_t skBagAddToCounter(
    skBag_t                *bag,
    const skBagKey_t       *key,
    const skBagCounter_t   *value_added)
{
    skBagCounter_t *counter;

    if (bag == NULL || key == NULL || value_added == NULL) {
        return SKBAG_ERR_INPUT;
    }

    /* get existing counter or allocate it */
    counter = bagAllocToCounter(bag, key);
    if (counter == NULL) {
        return SKBAG_ERR_MEMORY;
    }

    /* check whether (*counter + *value_added > SKBAG_COUNTER_MAX) */
    if (*counter > (SKBAG_COUNTER_MAX - *value_added)) {
        /* would overflow, return error */
        return SKBAG_ERR_OP_BOUNDS;
    }
    *counter += *value_added;

    return SKBAG_OK;
}


/* create a bag */
skBagErr_t skBagAlloc(
    skBag_t               **bag,
    skBagLevel_t            levels,
    const skBagLevelsize_t *level_sizes)
{
    uint8_t key_size = 0;
    skBagErr_t rv = SKBAG_OK;
    skBag_t *new_bag = NULL;
    skBagLevel_t lvl;
    skBagLevelsize_t offset = 0;

    /* check inputs */
    if (levels < MIN_LEVELS || levels > MAX_LEVELS) {
        /* bagtree must have at least one level */
        return SKBAG_ERR_INPUT;
    }

    if (level_sizes == NULL) {
        /* must have a valid array of level sizes */
        return SKBAG_ERR_INPUT;
    }

    /* each level must contain at least 1 bit of key. */
    for (lvl = 0; lvl < levels; ++lvl) {
        if (level_sizes[lvl] < MIN_LEVEL_BITS ||
            level_sizes[lvl] > MAX_LEVEL_BITS)
        {
            return SKBAG_ERR_INPUT;
        }
        key_size += level_sizes[lvl];
    }

    /* the tree can encode at most a 32b key */
    if (key_size < MIN_KEY_SIZE || key_size > MAX_KEY_SIZE) {
        return SKBAG_ERR_INPUT;
    }

    /* allocate the bag */
    new_bag = calloc(1, sizeof(skBag_t));
    if (new_bag == NULL) {
        rv = SKBAG_ERR_MEMORY;
        goto END;
    }

    /* memory is all allocated okay, initialize values */
    new_bag->root.child = NULL;
    new_bag->levels = levels;

    /*
     * go through the level size array again, this time setting the
     * values of the new array.
     */
    offset = key_size;
    for (lvl = 0; lvl < levels; ++lvl) {
        new_bag->level_bits[lvl] = level_sizes[lvl];
        new_bag->level_size[lvl] = 1u << level_sizes[lvl];
        offset -= level_sizes[lvl];
        new_bag->level_offset[lvl] = offset;
    }

    /* set pointer and return OK */
    *bag = new_bag;

  END:
    if (rv != SKBAG_OK) {
        if (new_bag) {
            free(new_bag);
        }
    }

    return rv;
}


skBagErr_t skBagCreate(
    skBag_t               **bag)
{
    const skBagLevelsize_t level_sizes[4] = { 9, 9, 9, 5 };
    return skBagAlloc(bag, 4, level_sizes);
}


/* return number of unique keys in bag */
uint64_t skBagCountKeys(
    const skBag_t          *bag)
{
    skBagStats_t stats;

    bagComputeStats(bag, &stats);
    return stats.unique_keys;
}


/* destroy a bag */
skBagErr_t skBagFree(
    skBag_t                *bag)
{
    if (bag == NULL) {
        return SKBAG_ERR_INPUT;
    }

    /* free nodes in tree */
    bagNodesFree(bag);

    /* free the bag */
    free(bag);

    return SKBAG_OK;
}


/* get counter at 'key' */
skBagErr_t skBagGetCounter(
    const skBag_t          *bag,
    const skBagKey_t       *key,
    skBagCounter_t         *out_value)
{
    skBagCounter_t *counter;

    if (bag == NULL || key == NULL) {
        SKBAG_COUNTER_COPY(out_value, skbag_counter_null);
        return SKBAG_ERR_INPUT;
    }

    if (bag->root.child == NULL) {
        SKBAG_COUNTER_COPY(out_value, skbag_counter_null);
        return SKBAG_OK;
    }

    counter = bagGetCounterPointer(bag, key);
    if (counter == NULL) {
        SKBAG_COUNTER_COPY(out_value, skbag_counter_null);
        return SKBAG_OK;
    }

    SKBAG_COUNTER_COPY(out_value, counter);
    return SKBAG_OK;
}


/* create iterator */
skBagErr_t skBagIteratorCreate(
    const skBag_t          *bag,
    skBagIterator_t       **iter)
{
    /* check inputs */
    if (bag == NULL || iter == NULL) {
        return SKBAG_ERR_INPUT;
    }

    /* allocate iterator */
    *iter = malloc(sizeof(skBagIterator_t));
    if (*iter == NULL) {
        return SKBAG_ERR_MEMORY;
    }

    (*iter)->bag = bag;
    skBagIteratorReset(*iter);
    return SKBAG_OK;
}


/* destroy the iterator */
skBagErr_t skBagIteratorDestroy(
    skBagIterator_t        *iter)
{
    if (iter == NULL) {
        return SKBAG_ERR_INPUT;
    }
    free(iter);
    return SKBAG_OK;
}


/* return next key/counter pair */
skBagErr_t skBagIteratorNext(
    skBagIterator_t        *iter,
    skBagKey_t             *key,
    skBagCounter_t         *counter)
{
    skBagCounter_t *next_counter;
    skBagLevel_t lvl;

    /* check input */
    if (iter == NULL) {
        return SKBAG_ERR_INPUT;
    }
    assert(iter->bag != NULL);

    next_counter = bagTraverseSubtree(iter, &iter->bag->root,
                                      (skBagLevel_t)0, 1);
    if (next_counter == NULL) {
        /* there was no next key. */
        return SKBAG_ERR_KEY_NOT_FOUND;
    }

    /* counter found */
    SKBAG_COUNTER_COPY(counter, next_counter);

    /* generate key value from path */
    *key = 0;
    for (lvl = 0; lvl < iter->bag->levels; ++lvl) {
        *key |= (iter->offset_path[lvl]
                 << SKBAG_GET_LEVEL_OFFSET(iter->bag, lvl));
    }

    return SKBAG_OK;
}


/* reset the iterator */
skBagErr_t skBagIteratorReset(
    skBagIterator_t        *iter)
{
    if (iter == NULL) {
        return SKBAG_ERR_INPUT;
    }
    /* don't want to skip the first counter */
    iter->f_skip_counter = 0;

    memset(iter->offset_path, 0, sizeof(iter->offset_path));
    return SKBAG_OK;
}


/* Read Bag from filename---a wrapper around skBagRead(). */
skBagErr_t skBagLoad(skBag_t **bag, const char *filename)
{
    skstream_t *stream = NULL;
    skBagErr_t err = SKBAG_OK;
    int rv;

    if (filename == NULL || bag == NULL) {
        return SKBAG_ERR_INPUT;
    }

    if ((rv = skStreamCreate(&stream, SK_IO_READ, SK_CONTENT_SILK))
        || (rv = skStreamBind(stream, filename))
        || (rv = skStreamOpen(stream)))
    {
        skStreamPrintLastErr(stream, rv, &skAppPrintErr);
        err = SKBAG_ERR_READ;
        goto END;
    }

    err = skBagRead(bag, stream);

  END:
    rv = skStreamDestroy(&stream);
    return err;
}


/* print statistics for the bag */
skBagErr_t skBagPrintTreeStats(
    const skBag_t          *bag,
    skstream_t             *stream_out)
{
    uint32_t total_counters;
    skBagStats_t stats;

    if (bag == NULL || stream_out == NULL) {
        return SKBAG_ERR_INPUT;
    }

    bagComputeStats(bag, &stats);

    skStreamPrint(stream_out, ("%18s:  %" PRIu32 " (%" PRIu64 " bytes)\n"),
                  "nodes allocated",
                  stats.nodes, stats.nodes_size);

    skStreamPrint(stream_out, ("%18s:  %" PRIu32 " (%" PRIu64 " bytes)\n"),
                  "leaves allocated",
                  stats.leaves, stats.leaves_size);

    skStreamPrint(stream_out, ("%18s:  %" PRIu64 " (%" PRIu64 " unique)\n"),
                  "keys inserted",
                  stats.keys_inserted, stats.unique_keys);

    total_counters = ((uint32_t)stats.leaves *
                      (uint32_t)SKBAG_GET_LEVEL_BLOCKS(bag,bag->levels-1));
    skStreamPrint(stream_out, "%18s:  %.02f%%\n",
                  "counter density",
                  (100.0 * (double)stats.unique_keys
                   / (double)total_counters));

    skStreamPrint(stream_out, ("%18s:  %" PRIu32 " -> %" PRIu32 "\n"),
                  "key range",
                  stats.min_key, stats.max_key);

    skStreamPrint(stream_out, ("%18s:  %" PRIu64 " -> %" PRIu64 "\n"),
                  "counter range",
                  stats.min_counter, stats.max_counter);

    return SKBAG_OK;
}




/*
 *  status = skBagProcessStream(stream_in, cb_data, cb_func);
 *
 *    Read a binary bag file from 'stream_in', and process each
 *    key-counter pair with the function pointed to by 'cb_func',
 *    passing 'cb_data' as the final argument to the 'cb_func'.
 */
skBagErr_t skBagProcessStream(
    skstream_t             *stream_in,
    void                   *cb_data,
    skBagStreamFunc_t       cb_func)
{
    int f_swap_byte_order;
    fileVersion_t bag_version;
    skBagErr_t err;
    skBagKey_t key;
    uint32_t counter_32;
    skBagCounter_t counter;
    ssize_t recs;

    /* read header */
    err = bagReadHeader(stream_in, &bag_version, &f_swap_byte_order);
    if (err != SKBAG_OK) {
        return err;
    }

    /* read key/counter pairs */
    while ((recs = skStreamRead(stream_in, &key, sizeof(skBagKey_t))) > 0) {
        if (f_swap_byte_order) {
            key = BSWAP32(key);
        }

        switch (bag_version) {
          case 1:
            /* version 1 had 32 bit counters */
            recs = skStreamRead(stream_in, &counter_32, sizeof(uint32_t));
            if (f_swap_byte_order) {
                counter = (skBagCounter_t)BSWAP32(counter_32);
            } else {
                counter = (skBagCounter_t)counter_32;
            }
            break;

          case 2:
          case 3:
            /* versions 2 and 3 are identical and have 64 bit
             * counters; v3 supports compression on write */
            recs = skStreamRead(stream_in, &counter, sizeof(skBagCounter_t));
            if (f_swap_byte_order) {
                counter = BSWAP64(counter);
            }
            break;

          case 0:
          default:
            skAbortBadCase(bag_version);
        }

        if (recs <= 0) {
            skAppPrintErr("Could not read counter for key");
            if (recs == -1) {
                skStreamPrintLastErr(stream_in, recs, &skAppPrintErr);
            }
            return SKBAG_ERR_READ;
        }

        err = cb_func(&key, &counter, cb_data);
        if (err != SKBAG_OK) {
            skAppPrintErr("Error %u processing key-counter pair",
                          err);
            return err;
        }
    }

    if (recs == -1) {
        skStreamPrintLastErr(stream_in, recs, &skAppPrintErr);
        return SKBAG_ERR_READ;
    }

    return SKBAG_OK;
}


/* create bag and fill it with contents from file */
skBagErr_t skBagRead(
    skBag_t               **bag,
    skstream_t             *stream_in)
{
    skBagErr_t err;

    if (bag == NULL || stream_in == NULL) {
        return SKBAG_ERR_INPUT;
    }

    /* create bag */
    err = skBagCreate(bag);
    if (err) {
        return err;
    }

    return skBagProcessStream(stream_in, *bag, &bagReadSetCounter);
}


/* Write 'bag' to 'filename'--a wrapper around skBagWrite(). */
skBagErr_t skBagSave(const skBag_t *bag, const char *filename)
{
    skstream_t *stream = NULL;
    skBagErr_t err = SKBAG_OK;
    int rv;

    if (filename == NULL || bag == NULL) {
        return SKBAG_ERR_INPUT;
    }

    if ((rv = skStreamCreate(&stream, SK_IO_WRITE, SK_CONTENT_SILK))
        || (rv = skStreamBind(stream, filename))
        || (rv = skStreamOpen(stream)))
    {
        skStreamPrintLastErr(stream, rv, &skAppPrintErr);
        err = SKBAG_ERR_OUTPUT;
        goto END;
    }

    err = skBagWrite(bag, stream);

    rv = skStreamClose(stream);
    if (rv) {
        skStreamPrintLastErr(stream, rv, &skAppPrintErr);
        err = SKBAG_ERR_OUTPUT;
    }

  END:
    rv = skStreamDestroy(&stream);
    return err;
}


/* set counter for 'key' to 'value'.  create key if needed */
skBagErr_t skBagSetCounter(
    skBag_t                *bag,
    const skBagKey_t       *key,
    const skBagCounter_t   *value)
{
    skBagCounter_t *counter;

    if (bag == NULL || key == NULL || value == NULL) {
        return SKBAG_ERR_INPUT;
    }

    /* don't allocate entries if counter is the null value */
    if (SKBAG_COUNTER_IS_NULL(value)) {
        counter = bagGetCounterPointer(bag, key);
        if (counter) {
            SKBAG_COUNTER_COPY(counter, value);
        }
        return SKBAG_OK;
    }

    /* get counter or allocate it */
    counter = bagAllocToCounter(bag, key);
    if (counter == NULL) {
        return SKBAG_ERR_MEMORY;
    }

    /* set */
    SKBAG_COUNTER_COPY(counter, value);

    return SKBAG_OK;
}


const char *skBagStrerror(
    skBagErr_t              err_code)
{
    static char err_buf[32];

    switch (err_code) {
      case SKBAG_OK:
        return "Success";
      case SKBAG_ERR_MEMORY:
        return "Memory allocation error";
      case SKBAG_ERR_KEY_NOT_FOUND:
        return "No more entries in bag";
      case SKBAG_ERR_INPUT:
        return "Illegal input";
      case SKBAG_ERR_OP_BOUNDS:
        return "Overflow/Underflow in counter";
      case SKBAG_ERR_OUTPUT:
        return "Error writing to stream";
      case SKBAG_ERR_READ:
        return "Error reading from stream";
    }

    snprintf(err_buf, sizeof(err_buf), "Unknown Error #%d", (int)err_code);
    return err_buf;
}


/* subtract 'value_sub' from counter at 'key' */
skBagErr_t skBagSubtractFromCounter(
    skBag_t                *bag,
    const skBagKey_t       *key,
    const skBagCounter_t   *value_sub)
{
    skBagCounter_t *counter;

    if (bag == NULL || key == NULL || value_sub == NULL) {
        return SKBAG_ERR_INPUT;
    }

    /* try to get existing counter */
    counter = bagGetCounterPointer(bag, key);
    if (counter == NULL) {
        /* no pointer, use a value of 0.  Subtraction will fail unless
         * value_sub is 0 */
        if (*value_sub == 0) {
            return SKBAG_OK;
        }
        return SKBAG_ERR_OP_BOUNDS;
    }

    if (*counter < *value_sub) {
        /* would go negative, return error */
        return SKBAG_ERR_OP_BOUNDS;
    }
    *counter -= *value_sub;

    return SKBAG_OK;
}


/* write bag to file */
skBagErr_t skBagWrite(
    const skBag_t          *bag,
    skstream_t             *stream_out)
{
    sk_file_header_t *hdr;
    skBagIterator_t *iter;
    skBagErr_t err, err2;
    skBagKey_t key;
    skBagCounter_t counter;
    int rv;

    if (bag == NULL || stream_out == NULL) {
        return SKBAG_ERR_INPUT;
    }

    err = skBagIteratorCreate(bag, &iter);
    if (err != SKBAG_OK) {
        return err;
    }

    hdr = skStreamGetSilkHeader(stream_out);
    skHeaderSetFileFormat(hdr, FT_RWBAG);
    skHeaderSetRecordVersion(hdr, RWBAG_FILE_VERSION);
    skHeaderSetRecordLength(hdr, RWBAG_RECORD_LENGTH);

    /* output header */
    rv = skStreamWriteSilkHeader(stream_out);
    if (rv) {
        skStreamPrintLastErr(stream_out, rv, &skAppPrintErr);
        err = SKBAG_ERR_OUTPUT;
        goto END;
    }

    /* output key/counter pairs */
    while ((err = skBagIteratorNext(iter, &key, &counter)) == SKBAG_OK) {
        skStreamWrite(stream_out, (void*)&key, sizeof(skBagKey_t));
        skStreamWrite(stream_out, (void*)&counter, sizeof(skBagCounter_t));
    }
    if (err == SKBAG_ERR_KEY_NOT_FOUND) {
        /* expected behavior.  Reset 'err' */
        err = SKBAG_OK;
    } else {
        skAppPrintErr("skBagWrite:  error looping");
    }

    err2 = skBagIteratorDestroy(iter);
    if (err2 != SKBAG_OK) {
        skAppPrintErr("skBagWrite:  error %u freeing iterator",
                       err2);
        /* Return err2 unless err already set */
        if (err == SKBAG_OK) {
            err = err2;
        }
    }

    rv = skStreamFlush(stream_out);
    if (rv) {
        skStreamPrintLastErr(stream_out, rv, &skAppPrintErr);
        err = SKBAG_ERR_OUTPUT;
        goto END;
    }

  END:
    return err;
}


/* Write the counter_data[0..(num_keys-1)] as bag to 'stream_out' */
skBagErr_t skBagWriteArray(
    const skBagCounter_t   *counter_data,
    skBagKey_t              num_keys,
    skstream_t             *stream_out)
{
    sk_file_header_t *hdr;
    skBagKey_t i;
    int rv;

    if (counter_data == NULL || stream_out == NULL) {
        return SKBAG_ERR_INPUT;
    }

    hdr = skStreamGetSilkHeader(stream_out);
    skHeaderSetFileFormat(hdr, FT_RWBAG);
    skHeaderSetRecordVersion(hdr, RWBAG_FILE_VERSION);
    skHeaderSetRecordLength(hdr, RWBAG_RECORD_LENGTH);

    /* output header */
    rv = skStreamWriteSilkHeader(stream_out);
    if (rv) {
        skStreamPrintLastErr(stream_out, rv, &skAppPrintErr);
        return SKBAG_ERR_OUTPUT;
    }

    /* loop over the keys and print the non-zero values */
    for (i = 0; i < num_keys; ++i, ++counter_data) {
        if (SKBAG_COUNTER_IS_NULL(counter_data)) {
            continue;
        }

        if (skStreamWrite(stream_out, &i, sizeof(skBagKey_t))
            != sizeof(skBagKey_t))
        {
            skStreamPrintLastErr(stream_out, -1, &skAppPrintErr);
            return SKBAG_ERR_OUTPUT;
        }
        if (skStreamWrite(stream_out, counter_data, sizeof(skBagCounter_t))
            != sizeof(skBagCounter_t))
        {
            skStreamPrintLastErr(stream_out, -1, &skAppPrintErr);
            return SKBAG_ERR_OUTPUT;
        }
    }

    rv = skStreamFlush(stream_out);
    if (rv) {
        skStreamPrintLastErr(stream_out, rv, &skAppPrintErr);
        return SKBAG_ERR_OUTPUT;
    }

    return SKBAG_OK;
}


/*
 *  status = skBagCopy(&dest, src);
 *
 *    Make a new bag that is a deep copy of src, and set '*dest' to
 *    it.
 */
skBagErr_t skBagCopy(skBag_t **dest, const skBag_t *src)
{
    skBag_t *bag;
    skBagNode_t src_node[MAX_LEVELS];
    skBagNode_t dst_node[MAX_LEVELS];
    skBagKey_t offset[MAX_LEVELS];
    skBagLevel_t level;
    size_t leaf_size;

    /* Allocate new bag structure */
    bag = malloc(sizeof(*bag));
    if (bag == NULL) {
        return SKBAG_ERR_MEMORY;
    }

    /* Create a shallow copy of the bag */
    *bag = *src;
    bag->root.child = NULL;
    leaf_size = SKBAG_GET_LEVEL_BLOCKS(src, src->levels - 1) *
                sizeof(skBagCounter_t);

    /* Handle the deep copy */
    level = 0;
    src_node[0].child = (skBagNode_t*)&src->root; /* Discard const */
    dst_node[0].child = &bag->root;
    offset[0] = 0;

    do {                        /* while (level) */
        skBagNode_t from = src_node[level].child[offset[level]];
        skBagNode_t to;

        /* Check to see if there is anything to copy */
        if (level == src->levels - 1 && from.leaf) {
            /* Copy leaf block */
            to.leaf = malloc(leaf_size);
            if (NULL == to.leaf) {
                skBagFree(bag);
                return SKBAG_ERR_MEMORY;
            }
            memcpy(to.leaf, from.leaf, leaf_size);
            dst_node[level].child[offset[level]].leaf = to.leaf;

            /* Move to next offset */
            ++offset[level];
        } else if (from.child) {
            /* Handle non-leaf block */
            to.child = calloc(SKBAG_GET_LEVEL_BLOCKS(src, level),
                              sizeof(skBagNode_t));
            if (NULL == to.child) {
                skBagFree(bag);
                return SKBAG_ERR_MEMORY;
            }
            dst_node[level].child[offset[level]].child = to.child;

            /* For a child node, increment the level */
            src_node[level + 1] = from;
            dst_node[level + 1] = to;
            ++level;
            offset[level] = 0;
        } else {
            /* If no child node here, go to next offset */
            ++offset[level];
        }

        /* Check for level decrement (end of block) */
        while (level &&
               offset[level] >= SKBAG_GET_LEVEL_BLOCKS(src, level - 1))
        {
            --level;
            ++offset[level];
        }
    } while (level);

    *dest = bag;

    return SKBAG_OK;
}


/*
** Local Variables:
** mode:c
** indent-tabs-mode:nil
** c-basic-offset:4
** End:
*/
