/*
** Copyright (C) 2004-2012 by Carnegie Mellon University.
**
** @OPENSOURCE_HEADER_START@
**
** Use of the SILK system and related source code is subject to the terms
** of the following licenses:
**
** GNU Public License (GPL) Rights pursuant to Version 2, June 1991
** Government Purpose License Rights (GPLR) pursuant to DFARS 252.227.7013
**
** NO WARRANTY
**
** ANY INFORMATION, MATERIALS, SERVICES, INTELLECTUAL PROPERTY OR OTHER
** PROPERTY OR RIGHTS GRANTED OR PROVIDED BY CARNEGIE MELLON UNIVERSITY
** PURSUANT TO THIS LICENSE (HEREINAFTER THE "DELIVERABLES") ARE ON AN
** "AS-IS" BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY
** KIND, EITHER EXPRESS OR IMPLIED AS TO ANY MATTER INCLUDING, BUT NOT
** LIMITED TO, WARRANTY OF FITNESS FOR A PARTICULAR PURPOSE,
** MERCHANTABILITY, INFORMATIONAL CONTENT, NONINFRINGEMENT, OR ERROR-FREE
** OPERATION. CARNEGIE MELLON UNIVERSITY SHALL NOT BE LIABLE FOR INDIRECT,
** SPECIAL OR CONSEQUENTIAL DAMAGES, SUCH AS LOSS OF PROFITS OR INABILITY
** TO USE SAID INTELLECTUAL PROPERTY, UNDER THIS LICENSE, REGARDLESS OF
** WHETHER SUCH PARTY WAS AWARE OF THE POSSIBILITY OF SUCH DAMAGES.
** LICENSEE AGREES THAT IT WILL NOT MAKE ANY WARRANTY ON BEHALF OF
** CARNEGIE MELLON UNIVERSITY, EXPRESS OR IMPLIED, TO ANY PERSON
** CONCERNING THE APPLICATION OF OR THE RESULTS TO BE OBTAINED WITH THE
** DELIVERABLES UNDER THIS LICENSE.
**
** Licensee hereby agrees to defend, indemnify, and hold harmless Carnegie
** Mellon University, its trustees, officers, employees, and agents from
** all claims or demands made against them (and any related losses,
** expenses, or attorney's fees) arising out of, or relating to Licensee's
** and/or its sub licensees' negligent use or willful misuse of or
** negligent conduct or willful misconduct regarding the Software,
** facilities, or other rights or assistance granted by Carnegie Mellon
** University under this License, including, but not limited to, any
** claims of product liability, personal injury, death, damage to
** property, or violation of any laws or regulations.
**
** Carnegie Mellon University Software Engineering Institute authored
** documents are sponsored by the U.S. Department of Defense under
** Contract FA8721-05-C-0003. Carnegie Mellon University retains
** copyrights in all material produced under this contract. The U.S.
** Government retains a non-exclusive, royalty-free license to publish or
** reproduce these documents, or allow others to do so, for U.S.
** Government purposes only pursuant to the copyright license under the
** contract clause at 252.227.7013.
**
** @OPENSOURCE_HEADER_END@
*/

/*
**    rwbagcat reads a binary bag, converts it to text, and outputs it
**    to stdout.  It can also print various statistics and summary
**    information about the bag.  It attempts to read the bag(s) from
**    stdin or from any arguments.
**
*/

#include <silk/silk.h>

RCSIDENT("$SiLK: rwbagcat.c 372a8bc31d8a 2012-02-10 21:55:28Z mthomas $");

#include <silk/bagtree.h>
#include <silk/skipset.h>
#include <silk/iptree.h>
#include <silk/utils.h>
#include <silk/skprintnets.h>
#include <silk/skstream.h>


/* LOCAL DEFINES AND TYPEDEFS */

/* where to write --help output */
#define USAGE_FH stdout

/* width of count fields in columnar output */
#define COUNT_WIDTH 20

/* return TRUE if a record's 'key' and 'counter' values are within the
 * global limits and if the key is in the global 'mask_set' if specified */
#define IS_RECORD_WITHIN_LIMITS(k, c)                   \
    (((k) >= minkey) && ((k) <= maxkey)                 \
     && ((c) >= mincounter) && ((c) <= maxcounter)      \
     && ((NULL == mask_set) || isRecordInMaskSet(k)))

typedef enum bin_scheme_en {
    BINSCHEME_NONE=0,
    BINSCHEME_LINEAR=1,
    BINSCHEME_BINARY=2,
    BINSCHEME_DECIMAL=3
} bin_scheme_t;


/* LOCAL VARIABLES */

/* global I/O state */
static skstream_t **inputs = NULL; /* array of all input files/streams/pipes */
static skstream_t *output;
static skstream_t *stats;
static skstream_t *treestats;
static int print_stats = 0;
static int print_treestats = 0;
static int print_network = 0;
static bin_scheme_t bin_scheme = BINSCHEME_NONE;
static const char *net_structure = NULL;

/* delimiter between output columns for hosts/counts */
static char output_delimiter = '|';

/* whether key/counter output is in columns (0) or scrunched together (1) */
static int no_columns = 0;

/* whether to suppress the final delimiter; default no (i.e. end with '|') */
static int no_final_delimiter = 0;

/* whether to print keys as integers or IP addresses */
static int print_integer_keys = 0;

/* zero pad ips */
static int zero_pad_ips = 0;

/* print out keys whose counter is zero (provided that minkey and
 * maxkey are both set */
static int print_zero_counts = 0;

/* initialize these to their opposites to know when they have been set
 * by the user */
static skBagCounter_t mincounter = SKBAG_COUNTER_MAX;
static skBagCounter_t maxcounter = SKBAG_COUNTER_MIN;

static skBagKey_t minkey = SKBAG_KEY_MAX;
static skBagKey_t maxkey = SKBAG_KEY_MIN;

/* only print keys that appear in this set */
static skipset_t *mask_set = NULL;

/* name of program to run to page output */
static char *pager = NULL;


/* OPTIONS SETUP */

typedef enum {
    OPT_NETWORK_STRUCTURE,
    OPT_BIN_IPS,
    OPT_STATS,
    OPT_TREE_STATS,
    OPT_MINCOUNTER,
    OPT_MAXCOUNTER,
    OPT_MINKEY,
    OPT_MAXKEY,
    OPT_MASK_SET,
    OPT_ZERO_COUNTS,
    OPT_OUTPUT_PATH,
    OPT_INTEGER_KEYS,
    OPT_ZERO_PAD_IPS,
    OPT_NO_COLUMNS,
    OPT_COLUMN_SEPARATOR,
    OPT_NO_FINAL_DELIMITER,
    OPT_DELIMITED,
    OPT_PAGER
} appOptionsEnum;


static struct option appOptions[] = {
    {"network-structure",   OPTIONAL_ARG, 0, OPT_NETWORK_STRUCTURE},
    {"bin-ips",             OPTIONAL_ARG, 0, OPT_BIN_IPS},
    {"stats",               OPTIONAL_ARG, 0, OPT_STATS},
    {"tree-stats",          OPTIONAL_ARG, 0, OPT_TREE_STATS},
    {"mincounter",          REQUIRED_ARG, 0, OPT_MINCOUNTER},
    {"maxcounter",          REQUIRED_ARG, 0, OPT_MAXCOUNTER},
    {"minkey",              REQUIRED_ARG, 0, OPT_MINKEY},
    {"maxkey",              REQUIRED_ARG, 0, OPT_MAXKEY},
    {"mask-set",            REQUIRED_ARG, 0, OPT_MASK_SET},
    {"zero-counts",         NO_ARG,       0, OPT_ZERO_COUNTS},
    {"output-path",         REQUIRED_ARG, 0, OPT_OUTPUT_PATH},
    {"integer-keys",        NO_ARG,       0, OPT_INTEGER_KEYS},
    {"zero-pad-ips",        NO_ARG,       0, OPT_ZERO_PAD_IPS},
    {"no-columns",          NO_ARG,       0, OPT_NO_COLUMNS},
    {"column-separator",    REQUIRED_ARG, 0, OPT_COLUMN_SEPARATOR},
    {"no-final-delimiter",  NO_ARG,       0, OPT_NO_FINAL_DELIMITER},
    {"delimited",           OPTIONAL_ARG, 0, OPT_DELIMITED},
    {"pager",               REQUIRED_ARG, 0, OPT_PAGER},
    {0,0,0,0 }              /* sentinel entry */
};


static const char *appHelp[] = {
    ("Print the sum of counters for each specified CIDR\n"
     "\tblock in the comma-separed list of CIDR block sizes and/or letters\n"
     "\t(T=0,A=8,B=16,C=24,X=27,H=32).  If argument contains 'S' or '/', for\n"
     "\teach CIDR block print host counts and number of occupied smaller\n"
     "\tCIDR blocks.  Additional CIDR blocks to summarize can be specified\n"
     "\tby listing them after the '/'. Def. TS/ABCX"),
    ("Invert the bag and count by distinct volume values.\n"
     "\tlinear:    volume => count(IP)\n"
     "\tbinary:    log2(volume) => count(IP)\n"
     "\tdecimal:   variation on log10(volume) => count(IP)"),
    ("Print general statistics about the bag to named stream.\n"
     "\tSend output to optional stream, or to stderr if none given"),
    ("Print bag performance data to named stream.\n"
     "\tSend output to optional stream, or to stdout if none given"),
    ("Output records whose counter is at least VALUE, an integer\n"
     "\tfrom 1 to 18446744073709551615. Def. Records with non-zero counters"),
    ("Output records whose counter is not more than VALUE, an\n"
     "\tinteger from 1 to 18446744073709551615"),
    ("Output records whose key is at least VALUE, an integer from 0\n"
     "\tto 4294967295 or an IP address.  Def. Records with non-zero counters"),
    ("Output records whose key is not more than VALUE, an integer\n"
     "\tor an IP address. Def. Records with non-zero counters"),
    "Output records that appear in this set. Def. All records",
    ("Print keys with a counter of zero. Def. No\n"
     "\t(requires --mask-set or both --minkey and --maxkey)"),
    "Write output to named stream. Def. stdout",
    "Print keys as integers. Def. dotted decimal",
    "Print keys as dotted-decimal with 3 digits per octet",
    "Disable fixed-width columnar output. Def. Columnar",
    "Use specified character between columns. Def. '|'",
    "Suppress column delimiter at end of line. Def. No",
    "Shortcut for --no-columns --no-final-del --column-sep=CHAR",
    "Program to invoke to page output. Def. $SILK_PAGER or $PAGER",
    (char *) NULL
};


/* LOCAL FUNCTION PROTOTYPES */

static void appUsageLong(void);
static void appTeardown(void);
static void appSetup(int argc, char **argv);
static int  appOptionsHandler(clientData cData, int opt_index, char *opt_arg);
static int  addInput(const char* filename);
static void removeInputs(void);
static int  setOutput(const char* filename, skstream_t **stream_out);
static int  isRecordInMaskSet(skBagKey_t key);
static int  printStats(
    skBag_t        *bag,
    skstream_t     *s_out);


/* FUNCTION DEFINITIONS */

/*
 *  appUsageLong();
 *
 *    Print complete usage information to USAGE_FH.  Pass this
 *    function to skOptionsSetUsageCallback(); skOptionsParse() will
 *    call this funciton and then exit the program when the --help
 *    option is given.
 */
static void appUsageLong(void)
{
#define USAGE_MSG                                            \
    ("[SWITCHES] [BAG_FILES]\n"                              \
     "\tPrint binary Bag files as text.\n")

    FILE *fh = USAGE_FH;

    skAppStandardUsage(fh, USAGE_MSG, appOptions, appHelp);
}


/*
 *  appTeardown()
 *
 *    Teardown all modules, close all files, and tidy up all
 *    application state.
 *
 *    This function is idempotent.
 */
static void appTeardown(void)
{
    static int teardownFlag = 0;

    if (teardownFlag) {
        return;
    }
    teardownFlag = 1;

    /* local teardown segment */

    /* Close open files */
    removeInputs();

    if (treestats != output) {
        skStreamDestroy(&treestats);
    }
    if (stats != output) {
        skStreamDestroy(&stats);
    }
    skStreamDestroy(&output);
    treestats = stats = NULL;

    skAppUnregister();
}


/*
 *  appSetup(argc, argv);
 *
 *    Perform all the setup for this application include setting up
 *    required modules, parsing options, etc.  This function should be
 *    passed the same arguments that were passed into main().
 *
 *    Returns to the caller if all setup succeeds.  If anything fails,
 *    this function will cause the application to exit with a FAILURE
 *    exit status.
 */
static void appSetup(int argc, char **argv)
{
    int arg_index;
    int using_pager = 0;

    /* verify same number of options and help strings */
    assert((sizeof(appHelp)/sizeof(char *)) ==
            (sizeof(appOptions)/sizeof(struct option)));

    /* register the application */
    skAppRegister(argv[0]);
    skOptionsSetUsageCallback(&appUsageLong);

    /* default output streams */
    output = NULL;
    stats = NULL;
    treestats = NULL;

    /* register the options */
    if (skOptionsRegister(appOptions, &appOptionsHandler, NULL))
    {
        skAppPrintErr("Unable to register options");
        exit(EXIT_FAILURE);
    }

    /* parse options */
    arg_index = skOptionsParse(argc, argv);
    assert(arg_index <= argc);
    if (arg_index < 0) {
        /* options parsing should print error */
        skAppUsage();           /* never returns */
    }

    if (print_network == 1 && bin_scheme != BINSCHEME_NONE) {
        skAppPrintErr("Cannot have both --%s and --%s",
                      appOptions[OPT_NETWORK_STRUCTURE].name,
                      appOptions[OPT_BIN_IPS].name);
        skAppUsage();           /* never returns */
    }

    /* default to printing network hosts */
    if (!print_stats && !print_treestats && !print_network
        && (bin_scheme == BINSCHEME_NONE))
    {
        print_network = 1;
        net_structure = "H";
    }

    /* if neither mask-set nor minkey and maxkey are set, error if
     * zero-count printing requested */
    if (print_zero_counts && (mask_set == NULL)
        && (minkey == SKBAG_KEY_MAX || maxkey == SKBAG_KEY_MIN))
    {
        skAppPrintErr("To use --%s, either --%s or both --%s and --%s"
                      " must be specified",
                      appOptions[OPT_ZERO_COUNTS].name,
                      appOptions[OPT_MASK_SET].name,
                      appOptions[OPT_MINKEY].name,
                      appOptions[OPT_MAXKEY].name);
        skAppUsage();           /* never returns */
    }

    /* set the minima and maxima */
    if (mincounter == SKBAG_COUNTER_MAX) {
        mincounter = SKBAG_COUNTER_MIN;
    }
    if (maxcounter == SKBAG_COUNTER_MIN) {
        maxcounter = SKBAG_COUNTER_MAX;
    }
    if (minkey == SKBAG_KEY_MAX) {
        minkey = SKBAG_KEY_MIN;
    }
    if (maxkey == SKBAG_KEY_MIN) {
        maxkey = SKBAG_KEY_MAX;
    }

    /* error if a minimum is greater than a maximum */
    if (mincounter > maxcounter) {
        skAppPrintErr(("Minimum counter greater than maximum: "
                       "%" PRIu64 " > %" PRIu64),
                      mincounter, maxcounter);
        exit(EXIT_FAILURE);
    }
    if (minkey > maxkey) {
        skAppPrintErr(("Minimum key greater than maximum: "
                       "%" PRIu32 " > %" PRIu32),
                      minkey, maxkey);
        exit(EXIT_FAILURE);
    }

    /* add one for "stdin" and add one for sentinel */
    inputs = calloc((2 + argc - arg_index), sizeof(skstream_t*));
    if (!inputs) {
        skAppPrintErr("Out of memory");
        exit(EXIT_FAILURE);
    }

    if (arg_index < argc) {
        /* get filenames from command line */
        for ( ; arg_index < argc; ++arg_index) {
            if (addInput(argv[arg_index])) {
                exit(EXIT_FAILURE);
            }
        }
    } else if ( !FILEIsATty(stdin)) {
        /* no files on command line, try to read from stdin */
        if (addInput("stdin")) {
            exit(EXIT_FAILURE);
        }
    } else {
        skAppPrintErr("No files given on command line and"
                      " stdin is connected to a terminal");
        skAppUsage();
    }

    if (inputs[0] == NULL) {
        skAppPrintErr("No inputs to process");
        exit(EXIT_FAILURE);
    }

    /* Set the default output if none was set */
    if (output == NULL) {
        if (setOutput("stdout", &output)) {
            skAppPrintErr("Unable to print to stdout");
            exit(EXIT_FAILURE);
        }
    }

    /* Allow paging of the output, and see if paging is active */
    skStreamPageOutput(output, pager);
    using_pager = (NULL != skStreamGetPager(output));

    /* If stats/treestats was requested but its output stream hasn't
     * been set, set it to stderr/stdout unless stderr/stdout is a
     * terminal and a pager is being used, in which case use the
     * pager. */
    if (print_stats && stats == NULL) {
        if (using_pager && FILEIsATty(stderr)) {
            stats = output;
        } else {
            if (setOutput("stderr", &stats)) {
                skAppPrintErr("Unable to print to stderr");
                exit(EXIT_FAILURE);
            }
        }
    }
    if (print_treestats && treestats == NULL) {
        if (using_pager || (0==strcmp("stdout", skStreamGetPathname(output)))){
            treestats = output;
        } else {
            if (setOutput("stdout", &treestats)) {
                skAppPrintErr("Unable to print to stdout");
                exit(EXIT_FAILURE);
            }
        }
    }

    if (atexit(appTeardown) < 0) {
        skAppPrintErr("Unable to register appTeardown() with atexit()");
        appTeardown();
        exit(EXIT_FAILURE);
    }

    return; /* OK */
}


/*
 *  status = appOptionsHandler(cData, opt_index, opt_arg);
 *
 *    Called by skOptionsParse(), this handles a user-specified switch
 *    that the application has registered, typically by setting global
 *    variables.  Returns 1 if the switch processing failed or 0 if it
 *    succeeded.  Returning a non-zero from from the handler causes
 *    skOptionsParse() to return a negative value.
 *
 *    The clientData in 'cData' is typically ignored; 'opt_index' is
 *    the index number that was specified as the last value for each
 *    struct option in appOptions[]; 'opt_arg' is the user's argument
 *    to the switch for options that have a REQUIRED_ARG or an
 *    OPTIONAL_ARG.
 */
static int appOptionsHandler(
    clientData  UNUSED(cData),
    int         opt_index,
    char       *opt_arg)
{
    skipaddr_t ip;
    uint64_t val64;
    int rv;

    switch ((appOptionsEnum)opt_index) {
      case OPT_NETWORK_STRUCTURE:
        net_structure = opt_arg;
        print_network = 1;
        break;

      case OPT_BIN_IPS:
        if (opt_arg == NULL) {
            bin_scheme = BINSCHEME_LINEAR;
        } else {
            size_t len = strlen(opt_arg);
            if (len == 0) {
                skAppPrintErr("The --%s switch requires an argument",
                              appOptions[opt_index].name);
                return 1;
            }
            if (strncmp(opt_arg, "linear", len) == 0) {
                bin_scheme = BINSCHEME_LINEAR;
            } else if (strncmp(opt_arg, "binary", len) == 0) {
                bin_scheme = BINSCHEME_BINARY;
            } else if (strncmp(opt_arg, "decimal", len) == 0) {
                bin_scheme = BINSCHEME_DECIMAL;
            } else {
                skAppPrintErr("Illegal bin scheme. "
                              "Should be one of: linear, binary, decimal.");
                return 1;
            }
        }
        break;

      case OPT_STATS:
        if (opt_arg != NULL) {
            if (stats) {
                skAppPrintErr("The --%s switch was given multiple times",
                              appOptions[opt_index].name);
                return 1;
            }
            if (setOutput(opt_arg, &stats)) {
                skAppPrintErr("Invalid %s '%s'",
                              appOptions[opt_index].name, opt_arg);
                return 1;
            }
        }
        print_stats = 1;
        break;

      case OPT_TREE_STATS:
        if (opt_arg != NULL) {
            if (treestats) {
                skAppPrintErr("The --%s switch was given multiple times",
                              appOptions[opt_index].name);
                return 1;
            }
            if (setOutput(opt_arg, &treestats)) {
                skAppPrintErr("Invalid %s '%s'",
                              appOptions[opt_index].name, opt_arg);
                return 1;
            }
        }
        print_treestats = 1;
        break;

      case OPT_MINCOUNTER:
        rv = skStringParseUint64(&val64, opt_arg, 1, 0);
        if (rv == SKUTILS_ERR_MINIMUM) {
            skAppPrintErr(("Smallest allowable --%s value is 1.\n"
                           "\tUse --%s to write records whose counters are 0"),
                          appOptions[opt_index].name,
                          appOptions[OPT_ZERO_COUNTS].name);
            return 1;
        }
        if (rv) {
            goto PARSE_ERROR;
        }
        mincounter = (skBagCounter_t)val64;
        break;

      case OPT_MAXCOUNTER:
        rv = skStringParseUint64(&val64, opt_arg, 1, 0);
        if (rv) {
            goto PARSE_ERROR;
        }
        maxcounter = (skBagCounter_t)val64;
        break;

      case OPT_MINKEY:
        rv = skStringParseIP(&ip, opt_arg);
        if (rv) {
            goto PARSE_ERROR;
        }
#if SK_ENABLE_IPV6
        if (skipaddrIsV6(&ip)) {
            skAppPrintErr("Invalid %s '%s': IPv6 addresses not supported",
                          appOptions[opt_index].name, opt_arg);
            return 1;
        }
#endif /* SK_ENABLE_IPV6 */
        minkey = (skBagKey_t)skipaddrGetV4(&ip);
        break;

      case OPT_MAXKEY:
        rv = skStringParseIP(&ip, opt_arg);
        if (rv) {
            goto PARSE_ERROR;
        }
#if SK_ENABLE_IPV6
        if (skipaddrIsV6(&ip)) {
            skAppPrintErr("Invalid %s '%s': IPv6 addresses not supported",
                          appOptions[opt_index].name, opt_arg);
            return 1;
        }
#endif /* SK_ENABLE_IPV6 */
        maxkey = (skBagKey_t)skipaddrGetV4(&ip);
        break;

      case OPT_MASK_SET:
        if (mask_set) {
            skAppPrintErr("The --%s switch was given multiple times",
                          appOptions[opt_index].name);
            return 1;
        }
        rv = skIPSetLoad(&mask_set, opt_arg);
        if (rv) {
            skAppPrintErr("Unable to read IPset from '%s': %s",
                          opt_arg, skIPSetStrerror(rv));
            exit(EXIT_FAILURE);
        }
        break;

      case OPT_OUTPUT_PATH:
        if (output) {
            skAppPrintErr("The --%s switch was given multiple times",
                          appOptions[opt_index].name);
            return 1;
        }
        if (setOutput(opt_arg, &output)) {
            skAppPrintErr("Invalid %s '%s'",
                          appOptions[opt_index].name, opt_arg);
            return 1;
        }
        break;

      case OPT_NO_COLUMNS:
        no_columns = 1;
        break;

      case OPT_NO_FINAL_DELIMITER:
        no_final_delimiter = 1;
        break;

      case OPT_COLUMN_SEPARATOR:
        output_delimiter = opt_arg[0];
        break;

      case OPT_DELIMITED:
        no_columns = 1;
        no_final_delimiter = 1;
        if (opt_arg) {
            output_delimiter = opt_arg[0];
        }
        break;

      case OPT_INTEGER_KEYS:
        print_integer_keys = 1;
        break;

      case OPT_ZERO_PAD_IPS:
        zero_pad_ips = 1;
        break;

      case OPT_ZERO_COUNTS:
        print_zero_counts = 1;
        break;

      case OPT_PAGER:
        pager = opt_arg;
        break;
    }

    return 0;                   /* OK */

  PARSE_ERROR:
    skAppPrintErr("Invalid %s '%s': %s",
                  appOptions[opt_index].name, opt_arg,
                  skStringParseStrerror(rv));
    return 1;
}


/*
 *  status = addInput(filename);
 *
 *    Add 'filename' as an entry to the array of inputs.
 */
static int addInput(const char *filename)
{
    static int input_count = 0;
    static int stdin_active = 0;
    skstream_t *s = NULL;
    int rv;

    assert(inputs);

    if (strcmp(filename, "stdin") == 0) {
        if (stdin_active == 1) {
            skAppPrintErr("Can only read from stdin one time");
            return -1;
        }
        stdin_active = 1;
    }

    if ((rv = skStreamCreate(&s, SK_IO_READ, SK_CONTENT_SILK))
        || (rv = skStreamBind(s, filename))
        || (rv = skStreamOpen(s)))
    {
        skStreamPrintLastErr(s, rv, &skAppPrintErr);
        skStreamDestroy(&s);
        return -1;
    }

    inputs[input_count] = s;
    ++input_count;

    return 0;
}


/*
 *  removeInputs();
 *
 *    Destroy all the streams in the global 'inputs' array.
 */
static void removeInputs(void)
{
    int i;

    if (inputs == NULL) {
        return;
    }

    for (i = 0; inputs[i]; ++i) {
        skStreamDestroy(&inputs[i]);
    }

    free(inputs);
    inputs = NULL;
}


/*
 *  status = setOutput(name, &stream);
 *
 *    Set stream's output to 'name'.  Return 0 on success, -1
 *    otherwise.
 */
static int setOutput(const char *filename, skstream_t **stream)
{
    int rv;

    if (stream == NULL) {
        skAppPrintErr("Not a valid skstream");
        return -1;
    }

    if (filename == NULL || filename[0] == '\0') {
        skAppPrintErr("Empty filename");
        return -1;
    }

    if ((rv = skStreamCreate(stream, SK_IO_WRITE, SK_CONTENT_TEXT))
        || (rv = skStreamBind(*stream, filename))
        || (rv = skStreamOpen(*stream)))
    {
        skStreamPrintLastErr(*stream, rv, &skAppPrintErr);
        skStreamDestroy(stream);
        return -1;
    }

    return 0;
}


/*
 *  in_set = isRecordInMaskSet(key);
 *
 *    Return a non-zero value if 'key' is in the mask_set.
 */
static int isRecordInMaskSet(skBagKey_t key)
{
    skipaddr_t ipaddr;
    skipaddrSetV4(&ipaddr, &key);
    return skIPSetCheckAddress(mask_set, &ipaddr);
}


static int printInvertedBag(skBag_t *bag)
{
    char s_label[64];
    skBagIterator_t *iter;
    skBagKey_t key;
    skBagCounter_t counter;
    char final_delim[] = {'\0', '\0'};

    if (!no_final_delimiter) {
        final_delim[0] = output_delimiter;
    }

    if (skBagIteratorCreate(bag, &iter) != SKBAG_OK) {
        return 1;
    }

    while (skBagIteratorNext(iter, &key, &counter) == SKBAG_OK) {
        switch (bin_scheme) {
          case BINSCHEME_LINEAR:
            /* label is just bin number */
            snprintf(s_label, sizeof(s_label), ("%" PRIu32), (uint32_t)key);
            break;

          case BINSCHEME_BINARY:
            /* label is range of values "2^03 to 2^04-1" */
            snprintf(s_label, sizeof(s_label), "2^%02lu to 2^%02lu-1",
                     (unsigned long)key, (unsigned long)(key + 1));
            break;

          case BINSCHEME_DECIMAL:
            /* label is the median value of possible keys in that bin */
            if (key < (skBagKey_t)100) {
                snprintf(s_label, sizeof(s_label), ("%" PRIu32),(uint32_t)key);
            } else {
                double min, max, mid;
                min = ceil(pow(10, (((double) key / 100.0) + 1.0)));
                max = floor(pow(10, ((((double) key + 1.0) / 100.0) + 1.0)));
                mid = floor((min + max) / 2.0);
                snprintf(s_label, sizeof(s_label), "%.0f", mid);
            }
            break;

          case BINSCHEME_NONE:
            skAbortBadCase(bin_scheme);
        }

        if (!no_columns) {
            skStreamPrint(output, ("%*s%c%*" PRIu64 "%s\n"),
                          COUNT_WIDTH, s_label, output_delimiter,
                          COUNT_WIDTH, counter, final_delim);
        } else {
            skStreamPrint(output, ("%s%c%" PRIu64 "%s\n"),
                          s_label, output_delimiter, counter, final_delim);
        }
    }

    if (skBagIteratorDestroy(iter) != SKBAG_OK) {
        return 1;
    }
    return 0;
}


static int bagcatInvertBag(skBag_t *bag)
{
    skBagIterator_t *iter = NULL;
    skBagKey_t key;
    skBagCounter_t counter;
    int rv = 1;

    /* Create an inverted bag */
    skBag_t *inverted_bag = NULL;

    if (skBagCreate(&inverted_bag) != SKBAG_OK) {
        goto END;
    }
    if (skBagIteratorCreate(bag, &iter) != SKBAG_OK) {
        goto END;
    }

    /* add inverted entry to bag */
    while (skBagIteratorNext(iter, &key, &counter) == SKBAG_OK) {
        if ( !IS_RECORD_WITHIN_LIMITS(key, counter)) {
            continue;
        }

        switch (bin_scheme) {
          case BINSCHEME_LINEAR:
            key = (skBagKey_t)((counter < UINT32_MAX) ? counter : UINT32_MAX);
            break;

          case BINSCHEME_BINARY:
            key = skIntegerLog2(counter);
            break;

          case BINSCHEME_DECIMAL:
            if (counter < (skBagCounter_t)100) {
                key = (skBagKey_t)counter;
            } else {
                key = (skBagKey_t)floor((log10((double)counter) - 1.0)
                                        * 100.0);
            }
            break;

          case BINSCHEME_NONE:
            skAbortBadCase(bin_scheme);
        }
        if (skBagIncrCounter(inverted_bag, &key) != SKBAG_OK) {
            goto END;
        }
    }

    if (printInvertedBag(inverted_bag)) {
        goto END;
    }

    rv = 0;

  END:
    if (inverted_bag) {
        skBagFree(inverted_bag);
    }
    if (iter) {
        skBagIteratorDestroy(iter);
    }

    return rv;
}


static int printNetwork(skBag_t *bag)
{
    skBagKey_t key;
    skBagCounter_t counter;
    skBagIterator_t *iter;
    skBagErr_t rv;
    skBagKey_t cur_key;
    netStruct_t *ns;

    /* Set up the netStruct */
    if (netStructureCreate(&ns, 1)) {
        skAppPrintErr("Error creating network-structure");
        return 1;
    }
    netStructureSetCountWidth(ns, COUNT_WIDTH);
    if (netStructureParse(ns, net_structure)) {
        return 1;
    }
    netStructureSetOutputStream(ns, output);
    netStructureSetDelimiter(ns, output_delimiter);
    if (no_columns) {
        netStructureSetNoColumns(ns);
    }
    if (no_final_delimiter) {
        netStructureSetNoFinalDelimiter(ns);
    }
    if (zero_pad_ips) {
        netStructureSetIpFormat(ns, SKIPADDR_ZEROPAD);
    } else if (print_integer_keys) {
        netStructureSetIpFormat(ns, SKIPADDR_DECIMAL);
    }

    if (print_zero_counts && mask_set) {
        /*
         * loop over the items in the set
         */
        skipset_iterator_t s_iter;
        skipaddr_t ipaddr;
        skipaddr_t end_addr;
        uint32_t cidr;
        skBagKey_t end_key;

        skIPSetIteratorBind(&s_iter, mask_set, 1, SK_IPV6POLICY_IGNORE);
        while (skIPSetIteratorNext(&s_iter, &ipaddr, &cidr) == SK_ITERATOR_OK){
            skCIDR2IPRange(&ipaddr, cidr, &ipaddr, &end_addr);
            cur_key = skipaddrGetV4(&ipaddr);
            if (cur_key > maxkey) {
                break;
            }
            end_key = skipaddrGetV4(&end_addr);
            if (end_key < minkey) {
                continue;
            }
            if (cur_key < minkey) {
                cur_key = minkey;
            }
            if (end_key > maxkey) {
                end_key = maxkey;
            }
            do {
                rv = skBagGetCounter(bag, &cur_key, &counter);
                switch (rv) {
                  case SKBAG_ERR_KEY_NOT_FOUND:
                    counter = 0;
                    /* FALLTHROUGH */
                  case SKBAG_OK:
                    if (counter <= maxcounter) {
                        netStructurePrintIP((uint32_t)cur_key,
                                            (uint64_t*)&counter,
                                            ns);
                    }
                    break;
                  default:
                    skAppPrintErr("Error reading key %" PRIu32 " from bag: %s",
                                  (uint32_t)cur_key, skBagStrerror(rv));
                }
            } while (cur_key++ < end_key);
        }
        netStructurePrintFinalize(ns);

    } else if (print_zero_counts) {
        /*
         * if we are to print zero counts, then we just loop through
         * all values, printing them.
         */

        for (cur_key = minkey; cur_key <= maxkey; ++cur_key) {
            rv = skBagGetCounter(bag, &cur_key, &counter);
            switch (rv) {
              case SKBAG_ERR_KEY_NOT_FOUND:
                counter = 0;
                /* FALLTHROUGH */
              case SKBAG_OK:
                if (counter <= maxcounter) {
                    netStructurePrintIP((uint32_t)cur_key, (uint64_t*)&counter,
                                        ns);
                }
                break;

              default:
                skAppPrintErr("Error reading key %" PRIu32 " from bag: %s",
                              (uint32_t)cur_key, skBagStrerror(rv));
            }

            /* handle potential roller */
            if (cur_key == SKBAG_KEY_MAX) {
                break;
            }
        }
        netStructurePrintFinalize(ns);

    } else {
        /*
         * otherwise, only pull the values we will actually print
         */
        if (skBagIteratorCreate(bag, &iter) != SKBAG_OK) {
            return 1;
        }

        while (SKBAG_OK == (rv = skBagIteratorNext(iter, &key, &counter))) {
            /* Skip if key or counter is not in range */
            if ( !IS_RECORD_WITHIN_LIMITS(key, counter)) {
                continue;
            }

            netStructurePrintIP((uint32_t)key, (uint64_t*)&counter, ns);
        }

        if (rv == SKBAG_ERR_KEY_NOT_FOUND) {
            /* Run one more time in order to close blocks and print
             * the final footers. */
            netStructurePrintFinalize(ns);
        } else {
            /* unexpected error from bag */
            skAppPrintErr("Error reading from bag: %s",
                          skBagStrerror(rv));
        }

        skBagIteratorDestroy(iter);
    }

    netStructureDestroy(&ns);

    return 0;
}


static int printStats(
    skBag_t        *bag,
    skstream_t     *stream_out)
{
    double counter_temp =  0.0;
    double counter_mult =  0.0;
    double sum =  0.0; /* straight sum */
    double sum2 = 0.0; /* sum of squares */
    double sum3 = 0.0; /* sum of cubes */

    double key_count = 0.0;
    double mean = 0.0;
    double stddev = 0.0;
    double temp = 0.0;
    double variance = 0.0;
    double skew = 0.0;
    double kurtosis = 0.0;

    skBagIterator_t *iter;
    skBagKey_t key;
    skBagCounter_t counter;

    skBagKey_t min_seen_key, max_seen_key;
    skBagCounter_t min_seen_counter, max_seen_counter;
    char min_seen_key_st[SK_NUM2DOT_STRLEN];
    char max_seen_key_st[SK_NUM2DOT_STRLEN];

    assert(bag != NULL);
    assert(stream_out != NULL);
    min_seen_key = max_seen_key = SKBAG_KEY_MIN;
    min_seen_counter = max_seen_counter = SKBAG_COUNTER_MAX;

    if (skBagIteratorCreate(bag, &iter) != SKBAG_OK) {
        return 1;
    }

    while (skBagIteratorNext(iter, &key, &counter) == SKBAG_OK) {
        if ( !IS_RECORD_WITHIN_LIMITS(key, counter)) {
            continue;
        }

        if (key_count < 1.0) {
            /* first entry */
            min_seen_key = max_seen_key = key;
            min_seen_counter = max_seen_counter = counter;
        } else {
            max_seen_key = key;
            if (counter < min_seen_counter) {
                min_seen_counter = counter;
            } else if (counter > max_seen_counter) {
                max_seen_counter = counter;
            }
        }

        /* straight sum */
        counter_temp = (double) counter;
        sum += counter_temp;

        /* sum of squares */
        counter_mult = counter_temp * counter_temp;
        sum2 += counter_mult;

        /* sum of cubes */
        counter_mult *= counter_temp;
        sum3 += counter_mult;

        ++key_count;
    }

    if (skBagIteratorDestroy(iter) != SKBAG_OK) {
        return 1;
    }


    skStreamPrint(stream_out, "\nStatistics\n");

    if (key_count < 1.0) {
        skStreamPrint(stream_out, "  No entries in bag.\n");
        return 0;
    }

    if (print_integer_keys) {
        snprintf(min_seen_key_st, sizeof(min_seen_key_st), ("%" PRIu32),
                 (uint32_t)min_seen_key);
        snprintf(max_seen_key_st, sizeof(max_seen_key_st), ("%" PRIu32),
                 (uint32_t)max_seen_key);
    } else if (zero_pad_ips) {
        num2dot0_r(min_seen_key, min_seen_key_st);
        num2dot0_r(max_seen_key, max_seen_key_st);
    } else {
        num2dot_r(min_seen_key, min_seen_key_st);
        num2dot_r(max_seen_key, max_seen_key_st);
    }

    /* formulae derived from HyperStat Online - David M. Lane */

    /* http://davidmlane.com/hyperstat/A15885.html (mean) */
    mean = sum / key_count;

    /* http://davidmlane.com/hyperstat/A16252.html (variance) */

    temp = sum2
           - (2.0 * mean * sum)
           + (key_count * mean * mean);

    variance = temp / (key_count - 1.0);

    /* http://davidmlane.com/hyperstat/A16252.html (standard deviation) */
    stddev = sqrt(variance);

    /* http://davidmlane.com/hyperstat/A11284.html (skew) */
    skew = ((sum3
               - (3.0 * mean * sum2)
               + (3.0 * mean * mean * sum)
               - (key_count * mean * mean * mean))
             / (key_count * variance * stddev));

    /* http://davidmlane.com/hyperstat/A53638.html (kurtosis) */
    kurtosis = (temp * temp) / (key_count * variance * variance);

    skStreamPrint(stream_out, ("%18s:  %" PRIu64 "\n%18s:  %" PRIu64 "\n"
                               "%18s:  %s\n%18s:  %s\n"
                               "%18s:  %" PRIu64 "\n%18s:  %" PRIu64 "\n"
                               "%18s:  %.4g\n%18s:  %.4g\n%18s:  %.4g\n"
                               "%18s:  %.4g\n%18s:  %.4g\n"),
                  "keys",               (uint64_t)key_count,
                  "sum of counters",    (uint64_t)sum,
                  "minimum key",        min_seen_key_st,
                  "maximum key",        max_seen_key_st,
                  "minimum counter",    (uint64_t)min_seen_counter,
                  "maximum counter",    (uint64_t)max_seen_counter,
                  "mean",               mean,
                  "variance",           variance,
                  "standard deviation", stddev,
                  "skew",               skew,
                  "kurtosis",           kurtosis);

    return 0;
}


/*
 * Output bag using current state of options
 */
static int processBag(skBag_t *bag)
{
    if (print_network != 0) {
        if (printNetwork(bag) != 0) {
            skAppPrintErr("Cannot print network structure");
            exit(EXIT_FAILURE);
        }
    }

    if (bin_scheme != BINSCHEME_NONE) {
        bagcatInvertBag(bag);
    }

    if (print_stats) {
        printStats(bag, stats);
    }

    if (print_treestats) {
        skBagPrintTreeStats(bag, treestats);
    }

    return 0;
}


int main(int argc, char **argv)
{
    skBagErr_t err;
    skBag_t *bag = NULL;
    int i;

    appSetup(argc, argv);       /* never returns on error */

    for (i = 0; inputs[i]; ++i) {
        err = skBagRead(&bag, inputs[i]);
        if (err != SKBAG_OK) {
            skAppPrintErr("Error reading bag from input stream '%s'",
                          skStreamGetPathname(inputs[i]));
            exit(EXIT_FAILURE);
        }

        if (processBag(bag)) {
            skAppPrintErr("Error processing bag '%s'",
                          skStreamGetPathname(inputs[i]));
            exit(EXIT_FAILURE);
        }

        err = skBagFree(bag);
        if (err != SKBAG_OK) {
            skAppPrintErr("Error deallocating bag '%s'",
                          skStreamGetPathname(inputs[i]));
            exit(EXIT_FAILURE);
        }
    }

    /* done */
    appTeardown();

    return 0;
}


/*
** Local Variables:
** mode:c
** indent-tabs-mode:nil
** c-basic-offset:4
** End:
*/
