/*
** Copyright (C) 2001-2012 by Carnegie Mellon University.
**
** @OPENSOURCE_HEADER_START@
**
** Use of the SILK system and related source code is subject to the terms
** of the following licenses:
**
** GNU Public License (GPL) Rights pursuant to Version 2, June 1991
** Government Purpose License Rights (GPLR) pursuant to DFARS 252.227.7013
**
** NO WARRANTY
**
** ANY INFORMATION, MATERIALS, SERVICES, INTELLECTUAL PROPERTY OR OTHER
** PROPERTY OR RIGHTS GRANTED OR PROVIDED BY CARNEGIE MELLON UNIVERSITY
** PURSUANT TO THIS LICENSE (HEREINAFTER THE "DELIVERABLES") ARE ON AN
** "AS-IS" BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY
** KIND, EITHER EXPRESS OR IMPLIED AS TO ANY MATTER INCLUDING, BUT NOT
** LIMITED TO, WARRANTY OF FITNESS FOR A PARTICULAR PURPOSE,
** MERCHANTABILITY, INFORMATIONAL CONTENT, NONINFRINGEMENT, OR ERROR-FREE
** OPERATION. CARNEGIE MELLON UNIVERSITY SHALL NOT BE LIABLE FOR INDIRECT,
** SPECIAL OR CONSEQUENTIAL DAMAGES, SUCH AS LOSS OF PROFITS OR INABILITY
** TO USE SAID INTELLECTUAL PROPERTY, UNDER THIS LICENSE, REGARDLESS OF
** WHETHER SUCH PARTY WAS AWARE OF THE POSSIBILITY OF SUCH DAMAGES.
** LICENSEE AGREES THAT IT WILL NOT MAKE ANY WARRANTY ON BEHALF OF
** CARNEGIE MELLON UNIVERSITY, EXPRESS OR IMPLIED, TO ANY PERSON
** CONCERNING THE APPLICATION OF OR THE RESULTS TO BE OBTAINED WITH THE
** DELIVERABLES UNDER THIS LICENSE.
**
** Licensee hereby agrees to defend, indemnify, and hold harmless Carnegie
** Mellon University, its trustees, officers, employees, and agents from
** all claims or demands made against them (and any related losses,
** expenses, or attorney's fees) arising out of, or relating to Licensee's
** and/or its sub licensees' negligent use or willful misuse of or
** negligent conduct or willful misconduct regarding the Software,
** facilities, or other rights or assistance granted by Carnegie Mellon
** University under this License, including, but not limited to, any
** claims of product liability, personal injury, death, damage to
** property, or violation of any laws or regulations.
**
** Carnegie Mellon University Software Engineering Institute authored
** documents are sponsored by the U.S. Department of Defense under
** Contract FA8721-05-C-0003. Carnegie Mellon University retains
** copyrights in all material produced under this contract. The U.S.
** Government retains a non-exclusive, royalty-free license to publish or
** reproduce these documents, or allow others to do so, for U.S.
** Government purposes only pursuant to the copyright license under the
** contract clause at 252.227.7013.
**
** @OPENSOURCE_HEADER_END@
*/

/*
**  rwuniqsetup.c
**
**  Application setup for rwuniq.  See rwuniq.c for a description.
*/

#include <silk/silk.h>

RCSIDENT("$SiLK: rwuniqsetup.c 372a8bc31d8a 2012-02-10 21:55:28Z mthomas $");

#include <silk/sksite.h>
#include <silk/iochecks.h>
#include <silk/skprefixmap.h>
#include <silk/skcountry.h>
#include "rwuniq.h"


/* TYPEDEFS AND DEFINES */

/* file handle for --help usage message */
#define USAGE_FH stdout

/* where to write filenames if --print-file specified */
#define PRINT_FILENAMES_FH  stderr


/* LOCAL VARIABLES */

/* Information about each potential "value" field the user can choose
 * to compute and display. */
static struct builtin_values_st {
    value_field_t   bv_f;
    /* whether this column is used for --all-counts, 1==yes */
    unsigned        bv_all_counts :1;
    /* whether the user requested this field */
    unsigned        bv_switched_on:1;
} builtin_values[] = {
    {{"Bytes",          NULL,
      0, UINT64_MAX, sizeof(uint64_t), 0, 20, VALUE_BYTES},   1, 0},
    {{"Packets",        NULL,
      0, UINT64_MAX, sizeof(uint32_t), 0, 10, VALUE_PACKETS}, 1, 0},
    {{"Records",        NULL,
      0, UINT64_MAX, sizeof(uint32_t), 0, 10, VALUE_FLOWS},   1, 0},
    {{"sTime-Earliest", NULL,
      0, UINT64_MAX, sizeof(uint32_t), 0, 19, VALUE_STIME},   1, 0},
    {{"eTime-Latest",   NULL,
      0, UINT64_MAX, sizeof(uint32_t), 0, 19, VALUE_ETIME},   1, 0},
    {{"sIP-Distinct",   NULL,
      0, UINT64_MAX, sizeof(uint64_t), 0, 10, DISTINCT_SIP},  0, 0},
    {{"dIP-Distinct",   NULL,
      0, UINT64_MAX, sizeof(uint64_t), 0, 10, DISTINCT_DIP},  0, 0}
};

static const size_t num_builtin_values = (sizeof(builtin_values)/
                                          sizeof(struct builtin_values_st));

/* key fields used when parsing the user's --fields switch */
static sk_stringmap_t *key_field_map = NULL;

/* the text the user entered for the --fields switch */
static const char *fields_arg = NULL;

/* aggregate value fields used when parsing the user's --values switch */
static sk_stringmap_t *value_field_map = NULL;

/* the text the user entered for the --values switch */
static const char *values_arg = NULL;

/* the number of bytes that the disinct counts put into the hash key
 * and into the heap value */
static size_t distinct_bytes_key = 0;
static size_t distinct_bytes_value = 0;

/* name of program to run to page output */
static char *pager;

/* temporary directory */
static const char *temp_directory = NULL;

/* the floor of the sTime and/or eTime */
static uint32_t time_bin_size = 0;

/* when time-binning is active and all time fields---sTime, eTime,
 * elapased---are requested, we must adjust 'elapsed' so that it is
 * equal to eTime-sTime.  this is non-0 if we must adjust */
static int adjust_elapsed = 0;

#define PARSE_KEY_ELAPSED   (1 << 0)
#define PARSE_KEY_STIME     (1 << 1)
#define PARSE_KEY_ETIME     (1 << 2)
#define PARSE_KEY_ALL_TIMES (PARSE_KEY_ELAPSED|PARSE_KEY_STIME|PARSE_KEY_ETIME)

/* which of elapsed, sTime, and eTime were requested. uses above values */
static unsigned int time_fields = 0;

/* input checker */
static iochecksInfoStruct_t *ioISP = NULL;

/* fields that get defined just like plugins */
static const struct app_static_plugins_st {
    char               *name;
    skplugin_setup_fn_t setup_fn;
} app_static_plugins[] = {
    {"addrtype",        skAddressTypesAddFields},
    {"ccfilter",        skCountryAddFields},
    {"pmapfilter",      skPrefixMapAddFields},
    {NULL, NULL}        /* sentinel */
};

/* plug-ins to attempt to load at startup */
static const char *app_plugin_names[] = {
    SK_PLUGIN_ADD_SUFFIX("silkpython"),
    NULL /* sentinel */
};

/* non-zero if we are shutting down due to a signal; controls whether
 * errors are printed in appTeardown(). */
static int caught_signal = 0;


/* OPTIONS */

typedef enum {
    OPT_FIELDS,
    OPT_VALUES,
    OPT_PLUGIN,
    OPT_ALL_COUNTS,
    /* OPT_BYTES...OPT_DIP_DISTINCT must be contiguous and appear in
     * same order as in builtin_values[] */
    OPT_BYTES,
    OPT_PACKETS,
    OPT_FLOWS,
    OPT_STIME,
    OPT_ETIME,
    OPT_SIP_DISTINCT,
    OPT_DIP_DISTINCT,
    OPT_PRESORTED_INPUT,
    OPT_SORT_OUTPUT,
    OPT_BIN_TIME,
    OPT_EPOCH_TIME,
    OPT_INTEGER_IPS,
    OPT_ZERO_PAD_IPS,
    OPT_INTEGER_SENSORS,
    OPT_INTEGER_TCP_FLAGS,
    OPT_NO_TITLES,
    OPT_NO_COLUMNS,
    OPT_COLUMN_SEPARATOR,
    OPT_NO_FINAL_DELIMITER,
    OPT_DELIMITED,
    OPT_PRINT_FILENAMES,
    OPT_COPY_INPUT,
    OPT_OUTPUT_PATH,
    OPT_PAGER,
    OPT_LEGACY_TIMESTAMPS
} appOptionsEnum;


static struct option appOptions[] = {
    {"fields",              REQUIRED_ARG, 0, OPT_FIELDS},
    {"values",              REQUIRED_ARG, 0, OPT_VALUES},
    {"plugin",              REQUIRED_ARG, 0, OPT_PLUGIN},
    {"all-counts",          NO_ARG,       0, OPT_ALL_COUNTS},
    {"bytes",               OPTIONAL_ARG, 0, OPT_BYTES},
    {"packets",             OPTIONAL_ARG, 0, OPT_PACKETS},
    {"flows",               OPTIONAL_ARG, 0, OPT_FLOWS},
    {"stime",               NO_ARG,       0, OPT_STIME},
    {"etime",               NO_ARG,       0, OPT_ETIME},
    {"sip-distinct",        OPTIONAL_ARG, 0, OPT_SIP_DISTINCT},
    {"dip-distinct",        OPTIONAL_ARG, 0, OPT_DIP_DISTINCT},
    {"presorted-input",     NO_ARG,       0, OPT_PRESORTED_INPUT},
    {"sort-output",         NO_ARG,       0, OPT_SORT_OUTPUT},
    {"bin-time",            OPTIONAL_ARG, 0, OPT_BIN_TIME},
    {"epoch-time",          NO_ARG,       0, OPT_EPOCH_TIME},
    {"integer-ips",         NO_ARG,       0, OPT_INTEGER_IPS},
    {"zero-pad-ips",        NO_ARG,       0, OPT_ZERO_PAD_IPS},
    {"integer-sensors",     NO_ARG,       0, OPT_INTEGER_SENSORS},
    {"integer-tcp-flags",   NO_ARG,       0, OPT_INTEGER_TCP_FLAGS},
    {"no-titles",           NO_ARG,       0, OPT_NO_TITLES},
    {"no-columns",          NO_ARG,       0, OPT_NO_COLUMNS},
    {"column-separator",    REQUIRED_ARG, 0, OPT_COLUMN_SEPARATOR},
    {"no-final-delimiter",  NO_ARG,       0, OPT_NO_FINAL_DELIMITER},
    {"delimited",           OPTIONAL_ARG, 0, OPT_DELIMITED},
    {"print-filenames",     NO_ARG,       0, OPT_PRINT_FILENAMES},
    {"copy-input",          REQUIRED_ARG, 0, OPT_COPY_INPUT},
    {"output-path",         REQUIRED_ARG, 0, OPT_OUTPUT_PATH},
    {"pager",               REQUIRED_ARG, 0, OPT_PAGER},
    {"legacy-timestamps",   OPTIONAL_ARG, 0, OPT_LEGACY_TIMESTAMPS},
    {"dynamic-library",     REQUIRED_ARG, 0, OPT_PLUGIN},
    {0,0,0,0}               /* sentinel entry */
};

static const char *appHelp[] = {
    NULL, /* generated dynamically */
    NULL, /* generated dynamically */
    ("Load given plug-in to add fields and/or values. Switch may\n"
     "\tbe repeated to load multiple plug-ins. Def. None"),
    ("Enable the next five switches--count everything.  If no\n"
     "\tcount is specified, flows are counted.  Def. No"),
    ("Sum bytes in each bin; optionally choose to print\n"
     "\tbins whose total is in given range; range is MIN or MIN-MAX. Def. No"),
    ("Sum packets in each bin; optionally choose to print\n"
     "\tbins whose total is in given range; range is MIN or MIN-MAX. Def. No"),
    ("Count flow records in each bin; optionally choose to print\n"
     "\tbins whose count is in given range; range is MIN or MIN-MAX. Def. No"),
    "Print earliest time flow was seen in each bin. Def. No",
    "Print latest time flow was seen  in each bin. Def. No",
    ("Count distinct sIPs in each bin; optionally choose to\n"
     "\tprint bins whose count is in range; range is MIN or MIN-MAX. Def. No"),
    ("Count distinct dIPs in each bin; optionally choose to\n"
     "\tprint bins whose count is in range; range is MIN or MIN-MAX. Def. No"),
    ("Assume input has been presorted using\n"
     "\trwsort invoked with the exact same --fields value. Def. No"),
    ("Present the output in sorted order. Def. No"),
    ("When using 'sTime' or 'eTime' as a key, adjust time(s) to\n"
     "\tto appear in N-second bins (floor of time is used). Def. No, "),
    "Print times in UNIX epoch seconds. Def. No",
    "Print IP numbers as integers. Def. Canonical form",
    "Print IP numbers in zero-padded canonical form. Def. No",
    "Print sensor as an integer. Def. Sensor name",
    "Print TCP Flags as an integer. Def. No",
    "Do not print column titles. Def. Print titles",
    "Disable fixed-width columnar output. Def. Columnar",
    "Use specified character between columns. Def. '|'",
    "Suppress column delimiter at end of line. Def. No",
    "Shortcut for --no-columns --no-final-del --column-sep=CHAR",
    "Print names of input files as they are opened. Def. No",
    "Copy all input SiLK Flows to given pipe or file. Def. No",
    "Send output to given file path. Def. stdout",
    "Program to invoke to page output. Def. $SILK_PAGER or $PAGER",
    ("Timestamp format. Choices:\n"
     "\t0==(new)\"YYYY/MM/DDThh:mm:ss\"; 1==(legacy)\"MM/DD/YYYY hh:mm:ss\".\n"
     "\tDef. 0 when switch not provided; 1 when switch provided with no value"),
    "Deprecated.  Alias for --plugin",
    (char *)NULL
};



/* LOCAL FUNCTION PROTOTYPES */

static void appUsageLong(void);
static int  appOptionsHandler(clientData cData, int opt_index, char *opt_arg);
static void appHandleSignal(int sig);

static int createStringmaps(void);
static int parseKeyFields(const char *field_string);
static int parseValueFields(const char *value_string);

/* Adding key and value fields */
static size_t getFieldBinWidth(rwrec_printable_fields_t field_id);
static int appAddPluginKey(
    key_field_t            *key_field,
    sk_stringmap_entry_t   *map_entry);

/* Sometimes key fields must be modified---e.g., sTime and eTime when
 * the --bin-time switch is given.  These functions do the necessary
 * modification. */
static void recToKeyStime(
    const rwRec    *rwrec,
    void           *bin_value);
static void recToKeyEtime(
    const rwRec    *rwrec,
    void           *bin_value);
static void recToKeyElapsed(
    const rwRec    *rwrec,
    void           *bin_value);
static void keyToRecStime(
    rwRec          *rwrec,
    const void     *bin_value);
static void keyToRecEtime(
    rwRec          *rwrec,
    const void     *bin_value);
static void keyToRecElapsed(
    rwRec          *rwrec,
    const void     *bin_value);


/* FUNCTION DEFINITIONS */

/*
 *  appUsageLong();
 *
 *    Print complete usage information to USAGE_FH.  Pass this
 *    function to skOptionsSetUsageCallback(); skOptionsParse() will
 *    call this funciton and then exit the program when the --help
 *    option is given.
 */
static void appUsageLong(void)
{
    FILE *fh = USAGE_FH;
    int i;

#define USAGE_MSG                                                             \
    ("--fields=N [SWITCHES] [FILES]\n"                                        \
     "\tSummarize SiLK Flow records into user-defined keyed bins specified\n" \
     "\twith the --fields switch.  For each keyed bin, print byte, packet,\n" \
     "\tand/or flow counts and/or the time window when key was active.\n"     \
     "\tWhen no files are given on command line, flows are read from STDIN.\n")

    /* Create the string maps for --fields and --values */
    createStringmaps();

    fprintf(fh, "%s %s", skAppName(), USAGE_MSG);

    fprintf(fh, "\nSWITCHES:\n");
    skOptionsDefaultUsage(fh);
    for (i = 0; appOptions[i].name; i++) {
        fprintf(fh, "--%s %s. ", appOptions[i].name,
                SK_OPTION_HAS_ARG(appOptions[i]));
        switch ((appOptionsEnum)i) {
          case OPT_FIELDS:
            /* Dynamically build the help */
            fprintf(fh, "Field(s) to use as key as a comma separated list:\n");
            skStringMapPrintUsage(key_field_map, fh, 4);
            break;
          case OPT_VALUES:
            fprintf(fh, "Value(s) to compute, a comma separated list:\n");
            skStringMapPrintUsage(value_field_map, fh, 4);
            break;
          case OPT_BIN_TIME:
            fprintf(fh, "%s%d\n", appHelp[i], DEFAULT_TIME_BIN);
            break;
          default:
            /* Simple help text from the appHelp array */
            fprintf(fh, "%s\n", appHelp[i]);
            break;
        }
    }

    skIPv6PolicyUsage(fh);
    skOptionsTempDirUsage(fh);
    sksiteOptionsUsage(fh);
    skPluginOptionsUsage(fh);
}


/*
 *  appSetup(argc, argv);
 *
 *    Perform all the setup for this application include setting up
 *    required modules, parsing options, etc.  This function should be
 *    passed the same arguments that were passed into main().
 *
 *    Returns to the caller if all setup succeeds.  If anything fails,
 *    this function will cause the application to exit with a FAILURE
 *    exit status.
 */
void appSetup(int argc, char **argv)
{
    int using_pager = 0;
    size_t i;
    int j;

    /* verify same number of options and help strings */
    assert((sizeof(appHelp)/sizeof(char *)) ==
           (sizeof(appOptions)/sizeof(struct option)));

    /* register the application */
    skAppRegister(argv[0]);
    skOptionsSetUsageCallback(&appUsageLong);

    /* initialize globals */
    output.pathname = "stdout";
    output.fp = stdout;
    value_byte_width = 0;
    value_num_fields = 0;
    memset(&app_flags, 0, sizeof(app_flags));

    /* initialize plugin library */
    skPluginSetup(2, SKPLUGIN_APP_UNIQ_FIELD, SKPLUGIN_APP_UNIQ_VALUE);

    /* allow for 0 input and 0 output pipes */
    ioISP = iochecksSetup(1, 0, argc, argv);

    /* register the options */
    if (skOptionsRegister(appOptions, &appOptionsHandler, NULL)
        || skOptionsTempDirRegister(&temp_directory)
        || skIPv6PolicyOptionsRegister(&ipv6_policy)
        || sksiteOptionsRegister(SK_SITE_FLAG_CONFIG_FILE))
    {
        skAppPrintErr("Unable to register options");
        appExit(EXIT_FAILURE);
    }

    /* try to load hard-coded plugins */
    for (j = 0; app_static_plugins[j].name; ++j) {
        skPluginAddAsPlugin(app_static_plugins[j].name,
                            app_static_plugins[j].setup_fn);
    }
    for (j = 0; app_plugin_names[j]; ++j) {
        skPluginLoadPlugin(app_plugin_names[j], 0);
    }

    /* parse options */
    ioISP->firstFile = skOptionsParse(argc, argv);
    if (ioISP->firstFile < 0) {
        skAppUsage();/* never returns */
    }

    /* try to load site config file; if it fails, we will not be able
     * to resolve flowtype and sensor from input file names, but we
     * should not consider it a complete failure */
    sksiteConfigure(0);

    /* create the ascii stream and set its properties */
    if (rwAsciiStreamCreate(&ascii_str)) {
        skAppPrintErr("Unable to create ascii stream");
        appExit(EXIT_FAILURE);
    }
    rwAsciiSetNoNewline(ascii_str);
    rwAsciiSetNoFinalDelimiter(ascii_str);
    rwAsciiSetDelimiter(ascii_str, delimiter);
    rwAsciiSetIPv6Policy(ascii_str, ipv6_policy);
    rwAsciiSetTimestampFlags(ascii_str, time_flags);
    if (app_flags.no_titles) {
        rwAsciiSetNoTitles(ascii_str);
    }
    if (app_flags.no_columns) {
        rwAsciiSetNoColumns(ascii_str);
    }
    if (app_flags.integer_sensors) {
        rwAsciiSetIntegerSensors(ascii_str);
    }
    if (app_flags.integer_tcp_flags) {
        rwAsciiSetIntegerTcpFlags(ascii_str);
    }

    /* conflicting properties */
    if (app_flags.zero_pad_ips && !app_flags.integer_ips) {
        rwAsciiSetZeroPadIps(ascii_str);
    }
    if (app_flags.integer_ips) {
        if (app_flags.zero_pad_ips) {
            skAppPrintErr("--integer-ips option overrides"
                          " --zero-pad-ips\n"
                          "\tWill print IPs as integers.");
        }
        rwAsciiSetIntegerIps(ascii_str);
    }

    /* verify that the temp directory is valid */
    if (skTempFileInitialize(temp_directory, &skAppPrintErr)) {
        appExit(EXIT_FAILURE);
    }

    /* set up the key_field_map and value_field_map */
    if (createStringmaps()) {
        appExit(EXIT_FAILURE);
    }

    /* make sure the user specified at least one key field */
    if (fields_arg == NULL || fields_arg[0] == '\0') {
        skAppPrintErr("The --%s switch is required",
                      appOptions[OPT_FIELDS].name);
        skAppUsage();         /* never returns */
    }

    /* parse the --fields and --values switches */
    if (parseKeyFields(fields_arg)) {
        appExit(EXIT_FAILURE);
    }
    if (parseValueFields(values_arg)) {
        appExit(EXIT_FAILURE);
    }

    /* size of key in the hash is the key and any distinct fields */
    hash_key_bytes = key_byte_width + distinct_bytes_key;

    /* size of the nodes in the hash table */
    hash_node_bytes = hash_key_bytes + hash_value_bytes;

#if SK_ENABLE_IPV6
    /* we use IPSets to count distinct sIP and dIP for presorted input
     * and to count multiple distinct columns for the hash
     * case. IPSets cannot hold IPv6 addresses, so complain on these
     * conditions */
    if (distinct_num_fields && ipv6_policy >= SK_IPV6POLICY_MIX) {
        if (app_flags.presorted_input) {
            skAppPrintErr(("Distinct IPs are not yet supported for"
                           " presorted input and IPv6.\n"
                           "\tSpecify --ipv6-policy=asv4 to ignore IPv6"
                           " addresses in the input"));
            appExit(EXIT_FAILURE);
        }
        /* else we are using the hashlib */
        if (distinct_num_fields > 1) {
            skAppPrintErr(("Only a single distinct IP field is supported for"
                           " IPv6 and unsorted\n"
                           "\tinput.  Specify either a single distinct"
                           " columns or --ipv6-policy=asv4\n"
                           "\tto ignore IPv6 addresses"));
            appExit(EXIT_FAILURE);
        }
    }
#endif  /* SK_ENABLE_IPV6 */

    /* If epoch time requested, reduce width of the textual columns
     * for the VALUE_STIME and VALUE_ETIME fields. */
    if (time_flags & SKTIMESTAMP_EPOCH) {
        for (i = 0; i < value_num_fields; ++i) {
            if ((value_fields[i].vf_id == VALUE_STIME)
                || (value_fields[i].vf_id == VALUE_ETIME))
            {
                value_fields[i].vf_text_len = 10;
            }
        }
    }

    /* Use STDIN as an input stream if it is not a TTY; make certain
     * we have some input and we are either reading from STDIN or
     * using files listed the command line, but not both. */
    if (iochecksAcceptFromStdin(ioISP)
        || iochecksInputs(ioISP, 0))
    {
        skAppUsage();
    }

    /* final check. See if stdout is being used for both --copy-input
     * and as the destination for the uniq data.  */
    if (ioISP->stdoutUsed && output.fp == stdout) {
        skAppPrintErr("stdout used for both --copy-input and ascii output");
        appExit(EXIT_FAILURE);
    }

    if (atexit(appTeardown) < 0) {
        skAppPrintErr("Unable to register appTeardown() with atexit()");
        appExit(EXIT_FAILURE);
    }

    /* invoke the pager */
    using_pager = skOpenPagerWhenStdoutTty(&(output.fp), &pager);
    if (using_pager < 0) {
        appExit(EXIT_FAILURE);
    }
    if (using_pager) {
        output.pathname = pager;
        output.ispipe = 1;
    }

    /* bind the Ascii Stream to the output */
    rwAsciiSetOutputHandle(ascii_str, output.fp);

    /* looks good, open the --copy-input destination */
    if (iochecksOpenCopyDest(ioISP)) {
        appExit(EXIT_FAILURE);
    }

    /* set signal handler to clean up temp files on SIGINT, SIGTERM, etc */
    if (skAppSetSignalHandler(&appHandleSignal)) {
        appExit(EXIT_FAILURE);
    }

    return;                       /* OK */
}


/*
 *  appTeardown()
 *
 *    Teardown all modules, close all files, and tidy up all
 *    application state.
 *
 *    This function is idempotent.
 */
void appTeardown(void)
{
    static int teardownFlag = 0;

    if (teardownFlag) {
        return;
    }
    teardownFlag = 1;

    iochecksTeardown(ioISP);

    /* plugin teardown */
    skPluginRunCleanup(SKPLUGIN_FN_ANY);
    skPluginTeardown();

    /* destroy output */
    rwAsciiStreamDestroy(&ascii_str);

    /* close output */
    if (output.fp != stdout) {
        if (output.ispipe) {
            pclose(output.fp);
        } else {
            if (EOF == fclose(output.fp) && !caught_signal) {
                skAppPrintErr("Error closing output file '%s': %s",
                              output.pathname, strerror(errno));
            }
        }
    }

    /* destroy string maps for keys and values */
    if (key_field_map) {
        skStringMapDestroy(key_field_map);
        key_field_map = NULL;
    }
    if (value_field_map) {
        skStringMapDestroy(value_field_map);
        value_field_map = NULL;
    }

    /* Destroy initial values array */
    if (value_initial_vals) {
        free(value_initial_vals);
        value_initial_vals = NULL;
    }

    /* destroy key */
    if (key_fields) {
        free(key_fields);
        key_fields = NULL;
    }

    /* destroy value descriptions*/
    if (value_fields) {
        free(value_fields);
        value_fields = NULL;
    }

    /* remove any temporary files */
    skTempFileTeardown();

    skAppUnregister();
}


/*
 *  status = appOptionsHandler(cData, opt_index, opt_arg);
 *
 *    Called by skOptionsParse(), this handles a user-specified switch
 *    that the application has registered, typically by setting global
 *    variables.  Returns 1 if the switch processing failed or 0 if it
 *    succeeded.  Returning a non-zero from from the handler causes
 *    skOptionsParse() to return a negative value.
 *
 *    The clientData in 'cData' is typically ignored; 'opt_index' is
 *    the index number that was specified as the last value for each
 *    struct option in appOptions[]; 'opt_arg' is the user's argument
 *    to the switch for options that have a REQUIRED_ARG or an
 *    OPTIONAL_ARG.
 */
static int appOptionsHandler(
    clientData  UNUSED(cData),
    int         opt_index,
    char       *opt_arg)
{
    size_t i;
    int rv;

    switch ((appOptionsEnum)opt_index) {
      case OPT_FIELDS:
        if (fields_arg) {
            skAppPrintErr("The --%s switch was specified multiple times",
                          appOptions[opt_index].name);
            return 1;
        }
        fields_arg = opt_arg;
        break;

      case OPT_VALUES:
        if (values_arg) {
            skAppPrintErr("The --%s switch was specified multiple times",
                          appOptions[opt_index].name);
            return 1;
        }
        values_arg = opt_arg;
        break;

      case OPT_ALL_COUNTS:
        for (i = 0; i < num_builtin_values; ++i) {
            if (builtin_values[i].bv_all_counts) {
                builtin_values[i].bv_switched_on = 1;
            }
        }
        break;

      case OPT_BYTES:
      case OPT_PACKETS:
      case OPT_FLOWS:
      case OPT_STIME:
      case OPT_ETIME:
      case OPT_SIP_DISTINCT:
      case OPT_DIP_DISTINCT:
        i = opt_index - OPT_BYTES;
        assert(i == builtin_values[i].bv_f.vf_id);
        builtin_values[i].bv_switched_on = 1;
        if (opt_arg) {
            rv = skStringParseRange64(&builtin_values[i].bv_f.vf_min,
                                      &builtin_values[i].bv_f.vf_max,
                                      opt_arg, 0, 0,
                                      SKUTILS_RANGE_SINGLE_OPEN);
            if (rv) {
                goto PARSE_ERROR;
            }
            /* treat a single value as having no max, not as a range
             * of a single value */
            if ((builtin_values[i].bv_f.vf_min==builtin_values[i].bv_f.vf_max)
                && !strchr(opt_arg, '-'))
            {
                builtin_values[i].bv_f.vf_max = UINT64_MAX;
            }
        }
        break;

      case OPT_PLUGIN:
        if (skPluginLoadPluginOrDynlib(opt_arg, DYNLIB_UNIQ, 1) != 0) {
            skAppPrintErr("Unable to load %s as a plugin", opt_arg);
            return 1;
        }
        break;

      case OPT_BIN_TIME:
        if (opt_arg == NULL || opt_arg[0] == '\0') {
            /* no time given; use default */
            time_bin_size = DEFAULT_TIME_BIN;
        } else {
            /* parse user's time */
            rv = skStringParseUint32(&time_bin_size, opt_arg, 1, 0);
            if (rv) {
                goto PARSE_ERROR;
            }
        }
        break;

      case OPT_PRESORTED_INPUT:
        app_flags.presorted_input = 1;
        break;

      case OPT_SORT_OUTPUT:
        app_flags.sort_output = 1;
        break;

      case OPT_EPOCH_TIME:
        if (time_flags & SKTIMESTAMP_MMDDYYYY) {
            skAppPrintErr("Both --%s and --%s specified. Using --%s",
                          appOptions[OPT_EPOCH_TIME].name,
                          appOptions[OPT_LEGACY_TIMESTAMPS].name,
                          appOptions[OPT_EPOCH_TIME].name);
        }
        time_flags = (time_flags & ~SKTIMESTAMP_MMDDYYYY) | SKTIMESTAMP_EPOCH;
        break;

      case OPT_INTEGER_IPS:
        app_flags.integer_ips = 1;
        break;

      case OPT_ZERO_PAD_IPS:
        app_flags.zero_pad_ips = 1;
        break;

      case OPT_INTEGER_SENSORS:
        app_flags.integer_sensors = 1;
        break;

      case OPT_INTEGER_TCP_FLAGS:
        app_flags.integer_tcp_flags = 1;
        break;

      case OPT_NO_TITLES:
        app_flags.no_titles = 1;
        break;

      case OPT_NO_COLUMNS:
        app_flags.no_columns = 1;
        break;

      case OPT_NO_FINAL_DELIMITER:
        app_flags.no_final_delimiter = 1;
        break;

      case OPT_COLUMN_SEPARATOR:
        delimiter = opt_arg[0];
        break;

      case OPT_DELIMITED:
        app_flags.no_columns = 1;
        app_flags.no_final_delimiter = 1;
        if (opt_arg) {
            delimiter = opt_arg[0];
        }
        break;

      case OPT_PRINT_FILENAMES:
        app_flags.print_filenames = 1;
        break;

      case OPT_COPY_INPUT:
        if (iochecksAllDestinations(ioISP, opt_arg)) {
            return 1;
        }
        break;

      case OPT_OUTPUT_PATH:
        output.pathname = opt_arg;
        if (skOpenFile(output.pathname, 1 /* write */, &output.fp,
                       &output.ispipe))
        {
            return 1;
        }
        break;

      case OPT_PAGER:
        pager = opt_arg;
        break;

      case OPT_LEGACY_TIMESTAMPS:
        if ((opt_arg == NULL) || (opt_arg[0] == '\0') || (opt_arg[0] == '1')) {
            if (time_flags & SKTIMESTAMP_EPOCH) {
                skAppPrintErr("Both --%s and --%s specified; using --%s",
                              appOptions[OPT_EPOCH_TIME].name,
                              appOptions[OPT_LEGACY_TIMESTAMPS].name,
                              appOptions[OPT_EPOCH_TIME].name);
                /* no change */
            } else {
                time_flags |= SKTIMESTAMP_MMDDYYYY;
            }
        } else if (opt_arg[0] != '0') {
            skAppPrintErr("Invalid %s '%s': Select 0 or 1",
                          appOptions[opt_index].name, opt_arg);
            return 1;
        }
        break;
    }

    return 0;                     /* OK */

  PARSE_ERROR:
    skAppPrintErr("Invalid %s '%s': %s",
                  appOptions[opt_index].name, opt_arg,
                  skStringParseStrerror(rv));
    return 1;
}


/*
 *  appExit(status)
 *
 *  Exit the application with the given status.
 */
void appExit(int status)
{
    appTeardown();
    exit(status);
}


/*
 *  appHandleSignal(signal_value)
 *
 *    Call appExit() to exit the program.  If signal_value is SIGPIPE,
 *    close cleanly; otherwise print a message that we've caught the
 *    signal and exit with EXIT_FAILURE.
 */
static void appHandleSignal(int sig)
{
    caught_signal = 1;

    if (sig == SIGPIPE) {
        /* we get SIGPIPE if something downstream, like rwcut, exits
         * early, so don't bother to print a warning, and exit
         * successfully */
        appExit(EXIT_SUCCESS);
    } else {
        skAppPrintErr("Caught signal..cleaning up and exiting");
        appExit(EXIT_FAILURE);
    }
}


/*
 *  ok = parseValueFields();
 *
 *    Loop over all the entries in the value_field[] array; for those
 *    value fields the user wants to count, parse the parameter to
 *    the switch.  The parameter should be NULL to use the default
 *    limits, a single value ("10") or a single value followed by a
 *    single hyphen ("10-") to be used as the minimum threshold, or
 *    the minimum and maximum limits separated by a hyphen ("10-20").
 *
 *    If the user didn't provide a value field, default to counting
 *    flows.
 *
 *    Figure out all the offsets into the values byte-array for each
 *    of the value fields and the overall length of the byte-array.
 *
 *    Returns 0 on success, or 1 if the opt_arg could not be parsed.
 */
static int parseValueFields(const char *value_string)
{
    sk_vector_t *parsed_vec = NULL;
    sk_stringmap_entry_t **map_entry;
    sk_stringmap_entry_t *one_entry;
    uint32_t field_count;

    /* return value; assume failure */
    int rv = -1;

    /* error message generated when parsing fields */
    char *errmsg;

    distinct_field_t *d_field;
    value_field_t *v_field;
    size_t len;

    uint32_t bad_key;
    size_t i, j;
    skplugin_err_t err;

    skplugin_field_t *field_handle;

    /* vector to hold the parsed fields */
    parsed_vec = skVectorNew(sizeof(sk_stringmap_entry_t*));
    if (NULL == parsed_vec) {
        skAppPrintErr("Out of memory");
        goto END;
    }

    /* parse the field list if given */
    if (value_string) {
        if (skStringMapParse(value_field_map, value_string,
                             SKSTRINGMAP_DUPES_ERROR, parsed_vec, &errmsg))
        {
            skAppPrintErr("Invalid %s: %s",
                          appOptions[OPT_VALUES].name, errmsg);
            goto END;
        }
    }

    /* no need to process --bytes,--packets,etc switches if they also
     * appear in the parsed_vec */
    for (i = 0; (map_entry = skVectorGetValuePointer(parsed_vec, i)); ++i) {
        switch ((*map_entry)->id) {
          case VALUE_BYTES:
          case VALUE_PACKETS:
          case VALUE_FLOWS:
          case VALUE_STIME:
          case VALUE_ETIME:
          case DISTINCT_SIP:
          case DISTINCT_DIP:
            builtin_values[(*map_entry)->id].bv_switched_on = 0;
            break;
          default:
            break;
        }
    }

    /* add active --bytes,--packets,etc switches to the vector */
    for (i = 0; i < num_builtin_values; ++i) {
        if (builtin_values[i].bv_switched_on) {
            skStringMapGetByName(value_field_map,
                                 builtin_values[i].bv_f.vf_title, &one_entry);
            skVectorAppendValue(parsed_vec, &one_entry);
        }
    }

    field_count = skVectorGetCount(parsed_vec);

    /* if the field_count is empty, count records */
    if (0 == field_count) {
        skStringMapGetByName(value_field_map,
                             builtin_values[VALUE_FLOWS].bv_f.vf_title,
                             &one_entry);
        skVectorAppendValue(parsed_vec, &one_entry);
        field_count = skVectorGetCount(parsed_vec);
    }

    /* set the number of value-fields and the value-byte-width to zero.
     * create the array to hold the information about each
     * value-field. */
    value_byte_width = 0;
    value_num_fields = 0;
    value_fields = calloc(field_count, sizeof(value_field_t));
    if (value_fields == NULL) {
        skAppPrintErr("Out of memory");
        goto END;
    }

    /* Loop over the selected values */
    for (i = 0; i < field_count; ++i) {
        v_field = &value_fields[value_num_fields];

        map_entry = skVectorGetValuePointer(parsed_vec, i);
        switch ((*map_entry)->id) {
          case VALUE_BYTES:
          case VALUE_PACKETS:
          case VALUE_FLOWS:
          case VALUE_STIME:
          case VALUE_ETIME:
            assert(builtin_values[(*map_entry)->id].bv_f.vf_id
                   == (*map_entry)->id);
            memcpy(v_field, &builtin_values[(*map_entry)->id].bv_f,
                   sizeof(value_field_t));
            v_field->vf_offset = value_byte_width;
            value_byte_width += v_field->vf_byte_width;
            ++value_num_fields;
            break;

          case DISTINCT_SIP:
          case DISTINCT_DIP:
            /* Counting distinct IPs when that IP is part of the key
             * doesn't make sense (the count is always 1). */
            if ((*map_entry)->id == DISTINCT_SIP) {
                bad_key = RWREC_FIELD_SIP;
                errmsg = "sIP";
            } else {
                bad_key = RWREC_FIELD_DIP;
                errmsg = "dIP";
            }
            for (j = 0; j < key_num_fields; ++j) {
                if (bad_key == key_fields[j].kf_id) {
                    skAppPrintErr(("Will not count distinct %ss"
                                   " when %s is part of key"),
                                  errmsg, errmsg);
                    goto END;
                }
            }
            /* the distinct fields are part of the key in the hash
             * table, but are printed as values.  add to value_fields,
             * but do not account for their byte-width yet, and only
             * handle offset relative to other distinct fields. */
            assert(builtin_values[(*map_entry)->id].bv_f.vf_id
                   == (*map_entry)->id);
            memcpy(v_field, &builtin_values[(*map_entry)->id].bv_f,
                   sizeof(value_field_t));
            v_field->vf_offset = distinct_bytes_value;
            distinct_bytes_value += v_field->vf_byte_width;

            /* keep track of how the hash key needs to change */
            d_field = &distinct_fields[distinct_num_fields];
            memset(d_field, 0, sizeof(distinct_field_t));

            d_field->df_val_field_pos = value_num_fields;
            d_field->df_id = (*map_entry)->id;
            d_field->df_offset = key_byte_width + distinct_bytes_key;
#if SK_ENABLE_IPV6
            if (ipv6_policy >= SK_IPV6POLICY_MIX) {
                if ((*map_entry)->id == DISTINCT_SIP) {
                    d_field->df_r2k = rwrec_MemGetSIPv6;
                } else {
                    d_field->df_r2k = rwrec_MemGetDIPv6;
                }
                d_field->df_byte_width = RWREC_SIZEOF_SIPv6;
            } else
#endif
            {
                if ((*map_entry)->id == DISTINCT_SIP) {
                    d_field->df_r2k = rwrec_MemGetSIPv4;
                } else {
                    d_field->df_r2k = rwrec_MemGetDIPv4;
                }
                d_field->df_byte_width = RWREC_SIZEOF_SIPv4;
            }
            distinct_bytes_key += d_field->df_byte_width;
            ++distinct_num_fields;
            ++value_num_fields;
            break;

          default:
            /* field comes from a plug-in */
            field_handle = (skplugin_field_t*)(*map_entry)->userdata;
            assert(field_handle);

            v_field->vf_id = VALUE_PLUGIN;
            v_field->vf_plugin_field = field_handle;
            err = skPluginFieldTitle(field_handle, &v_field->vf_title);
            assert(err == SKPLUGIN_OK);
            err = skPluginFieldGetLenBin(field_handle, &v_field->vf_byte_width);
            assert(err == SKPLUGIN_OK);
            /* use itermediate variable due to type differences */
            err = skPluginFieldGetLenText(field_handle, &len);
            v_field->vf_text_len = (int)len;
            assert(err == SKPLUGIN_OK);

            v_field->vf_offset = value_byte_width;
            value_byte_width += v_field->vf_byte_width;
            ++value_num_fields;
            break;
        }
    }

    /* adjust offset of distinct fields in the value_fields array */
    for (i = 0, v_field = value_fields; i < value_num_fields; ++i, ++v_field) {
        switch (v_field->vf_id) {
          case DISTINCT_SIP:
          case DISTINCT_DIP:
            v_field->vf_offset += value_byte_width;
            break;
          default:
            break;
        }
    }

    if (0 == value_byte_width) {
        /* if only distinct counts were requested, there will no value
         * fields.  however, the hash table does not accept a value
         * byte size of 0, so use a single byte.  Also, we must
         * allocate the initial value array  */
        hash_value_bytes = 1;

        value_initial_vals = calloc(hash_value_bytes, sizeof(uint8_t));
        if (NULL == value_initial_vals) {
            skAppPrintErr("Memory error default initial values");
            goto END;
        }
    } else {
        if (value_byte_width > MAX_VALUE_BYTE_WIDTH) {
            skAppPrintErr("Values make too large a key: %u bytes > %u max",
                          (unsigned int)value_byte_width,MAX_VALUE_BYTE_WIDTH);
            goto END;
        }

        hash_value_bytes = value_byte_width;

        /* allocate a byte-array to hold the initial value for each
         * aggregate field */
        value_initial_vals = calloc(value_byte_width, sizeof(uint8_t));
        if (NULL == value_initial_vals) {
            skAppPrintErr("Memory error default initial values");
            goto END;
        }

        /* handle any plug-in fields in that byte-array and/or set the
         * sTime field to the maximum */
        for (i=0, v_field=value_fields; i < value_num_fields; ++i, ++v_field) {
            switch (v_field->vf_id) {
              case VALUE_STIME:
                memset(value_initial_vals + v_field->vf_offset, 0xFF,
                       v_field->vf_byte_width);
                break;
              case VALUE_PLUGIN:
                assert(v_field->vf_plugin_field);
                err = skPluginFieldGetInitialValue(
                    v_field->vf_plugin_field,
                    value_initial_vals + v_field->vf_offset);
                if (err != SKPLUGIN_OK) {
                    skAppPrintErr("Error initializing plugin value");
                    appExit(EXIT_FAILURE);
                }
                break;
              default:
                break;
            }
        }
    }

    rv = 0;

  END:
    /* do standard clean-up */
    if (parsed_vec != NULL) {
        skVectorDestroy(parsed_vec);
    }
    if (rv != 0) {
        /* something went wrong. do additional clean-up */
        if (value_fields) {
            free(value_fields);
            value_fields = NULL;
        }
        if (value_initial_vals) {
            free(value_initial_vals);
            value_initial_vals = NULL;
        }
    }

    return rv;
}


/*
 *  status = parseKeyFields(field_string);
 *
 *    Parse the string that represents the columns the user wishes to
 *    use when binning the flows, fill in the key_fields[] array, and
 *    add columns to the rwAsciiStream.  Return 0 on success or
 *    non-zero on error.
 */
static int parseKeyFields(const char *field_string)
{
    key_field_t *keyfield;
    key_field_t *etime_field = NULL;
    sk_vector_t *parsed_vec = NULL;
    sk_stringmap_entry_t **map_entry;
    uint32_t field_count;

    /* return value; assume failure */
    int rv = 1;

    /* error message generated when parsing fields */
    char *errmsg;

    /* keep track of which time field we see last; uses the
     * PARSE_KEY_* macros */
    unsigned int final_time_field = 0;

    /* whether we've added the dPort key.  Both DPORT and
     * ICMP_TYPE_CODE map to the dPort. */
    unsigned int saw_dport = 0;

    /* which of class and type were seen */
    unsigned int saw_class_type = 0;

    uint32_t i;

    /* vector to hold the parsed fields */
    parsed_vec = skVectorNew(sizeof(sk_stringmap_entry_t*));
    if (NULL == parsed_vec) {
        skAppPrintErr("Out of memory");
        goto END;
    }

    /* parse the --fields argument into a vector */
    if (skStringMapParse(key_field_map, field_string, SKSTRINGMAP_DUPES_ERROR,
                         parsed_vec, &errmsg))
    {
        skAppPrintErr("Invalid %s: %s",
                      appOptions[OPT_FIELDS].name, errmsg);
        goto END;
    }

    field_count = skVectorGetCount(parsed_vec);

    /* check for field names that map to the same location */
    for (i = 0; i < field_count; ++i) {
        map_entry = skVectorGetValuePointer(parsed_vec, i);
        switch ((*map_entry)->id) {
          case RWREC_FIELD_STIME:
          case RWREC_FIELD_STIME_MSEC:
            final_time_field = PARSE_KEY_STIME;
            time_fields |= final_time_field;
            break;
          case RWREC_FIELD_ELAPSED:
          case RWREC_FIELD_ELAPSED_MSEC:
            final_time_field = PARSE_KEY_ELAPSED;
            time_fields |= final_time_field;
            break;
          case RWREC_FIELD_ETIME:
          case RWREC_FIELD_ETIME_MSEC:
            final_time_field = PARSE_KEY_ETIME;
            time_fields |= final_time_field;
            break;
          case RWREC_FIELD_DPORT:
          case RWREC_FIELD_ICMP_TYPE_CODE:
            if (0 == saw_dport) {
                saw_dport = 1;
            }
            break;
          case RWREC_FIELD_FTYPE_CLASS:
          case RWREC_FIELD_FTYPE_TYPE:
            if (0 == saw_class_type) {
                saw_class_type = 1;
            }
            break;
          default:
            break;
        }
    }

    /* Warn if --bin-time requested but time not part of key.  Warn
     * that duration field will be modified when all time fields are
     * given. */
    if (time_bin_size != 0) {
        switch (time_fields) {
          case 0:
          case PARSE_KEY_ELAPSED:
            if (FILEIsATty(stderr)) {
                skAppPrintErr(("Warning: Neither sTime nor eTime appear in"
                               " --%s; --%s switch ignored"),
                              appOptions[OPT_FIELDS].name,
                              appOptions[OPT_BIN_TIME].name);
            }
            break;
          case PARSE_KEY_ALL_TIMES:
            /* must adjust elapsed to be eTime-sTime */
            adjust_elapsed = 1;
            if (FILEIsATty(stderr)) {
                skAppPrintErr("Warning: Modifying duration field "
                              "to be eTime-sTime");
            }
            break;
        }
    }

    /* if all time fields were requested, drop the last one that was
     * seen from 'time_fields'  */
    if (PARSE_KEY_ALL_TIMES == time_fields) {
        time_fields &= ~final_time_field;
    }

    /* set the number of key fields and the key-byte-width to zero.
     * create the array to hold the information about each
     * key-field. */
    key_num_fields = 0;
    key_byte_width = 0;
    key_fields = calloc(field_count, sizeof(key_field_t));
    if (key_fields == NULL) {
        skAppPrintErr("Out of memory");
        goto END;
    }

    /* reset flags */
    saw_dport = 0;
    saw_class_type = 0;

    /* add the key fields to the field-list and to the ascii stream. */
    for (i = 0; i < field_count; ++i) {
        keyfield = &(key_fields[key_num_fields]);

        map_entry = skVectorGetValuePointer(parsed_vec, i);
        if ((*map_entry)->userdata) {
            /* field comes from a plug-in */
            if (appAddPluginKey(keyfield, *map_entry)) {
                skAppPrintErr("Cannot add field %s from plugin",
                              (*map_entry)->name);
                goto END;
            }
            ++key_num_fields;
            continue;
        }
        /* else field is built-in */
        assert((*map_entry)->id < RWREC_PRINTABLE_FIELD_COUNT);

        switch ((rwrec_printable_fields_t)(*map_entry)->id) {
          case RWREC_FIELD_DPORT:
          case RWREC_FIELD_ICMP_TYPE_CODE:
            if (saw_dport) {
                if (rwAsciiAppendOneField(ascii_str, (*map_entry)->id)) {
                    skAppPrintErr("Cannot add field %s to output stream",
                                  (*map_entry)->name);
                    goto END;
                }
                continue;
            }
            saw_dport = 1;
            break;

          case RWREC_FIELD_FTYPE_CLASS:
          case RWREC_FIELD_FTYPE_TYPE:
            if (saw_class_type) {
                if (rwAsciiAppendOneField(ascii_str, (*map_entry)->id)) {
                    skAppPrintErr("Cannot add field %s to output stream",
                                  (*map_entry)->name);
                }
                continue;
            }
            saw_class_type = 1;
            break;

          case RWREC_FIELD_STIME:
          case RWREC_FIELD_STIME_MSEC:
            if (0 == (time_fields & PARSE_KEY_STIME)) {
                /* do not add a keyfield for this field */
                if (rwAsciiAppendOneField(ascii_str, (*map_entry)->id)) {
                    skAppPrintErr("Cannot add field %s to output stream",
                                  (*map_entry)->name);
                }
                continue;
            }
            keyfield->kf_r2k = &recToKeyStime;
            keyfield->kf_k2r = &keyToRecStime;
            break;

          case RWREC_FIELD_ELAPSED:
          case RWREC_FIELD_ELAPSED_MSEC:
            if (0 == (time_fields & PARSE_KEY_ELAPSED)) {
                /* do not add a keyfield for this field */
                if (rwAsciiAppendOneField(ascii_str, (*map_entry)->id)) {
                    skAppPrintErr("Cannot add field %s to output stream",
                                  (*map_entry)->name);
                }
                continue;
            }
            keyfield->kf_r2k = &recToKeyElapsed;
            keyfield->kf_k2r = &keyToRecElapsed;
            break;

          case RWREC_FIELD_ETIME:
          case RWREC_FIELD_ETIME_MSEC:
            if (0 == (time_fields & PARSE_KEY_ETIME)) {
                /* do not add a keyfield for this field */
                if (rwAsciiAppendOneField(ascii_str, (*map_entry)->id)) {
                    skAppPrintErr("Cannot add field %s to output stream",
                                  (*map_entry)->name);
                }
                continue;
            }
            etime_field = keyfield;
            keyfield->kf_r2k = recToKeyEtime;
            keyfield->kf_k2r = keyToRecEtime;
            break;

          default:
            break;
        }

        assert(0 == keyfield->kf_width);

        keyfield->kf_id     = (*map_entry)->id;
        keyfield->kf_offset = key_byte_width;
        keyfield->kf_width  = getFieldBinWidth(keyfield->kf_id);
        if (rwAsciiAppendOneField(ascii_str, keyfield->kf_id)) {
            skAppPrintErr("Cannot add field %s to output stream",
                          (*map_entry)->name);
        }
        /* adjust byte width */
        key_byte_width += keyfield->kf_width;
        ++key_num_fields;
    }

    /* check size of key */
    if (key_byte_width > MAX_KEY_BYTE_WIDTH) {
        skAppPrintErr("Fields make too large a key: %u bytes > %u max",
                      (unsigned int)key_byte_width, MAX_KEY_BYTE_WIDTH);
        goto END;
    }

    /* warn when using --presorted-input and multiple time fields are
     * present or when the time field is not the final field */
    if (app_flags.presorted_input && FILEIsATty(stderr)) {
        switch (time_fields) {
          case 0:
            break;
          case PARSE_KEY_ELAPSED:
          case PARSE_KEY_STIME:
          case PARSE_KEY_ETIME:
            /* one field is present.  see if it is last */
            switch (key_fields[key_num_fields-1].kf_id) {
              case RWREC_FIELD_STIME:
              case RWREC_FIELD_STIME_MSEC:
              case RWREC_FIELD_ELAPSED:
              case RWREC_FIELD_ELAPSED_MSEC:
              case RWREC_FIELD_ETIME:
              case RWREC_FIELD_ETIME_MSEC:
                /* one field is present and it is last */
                break;
              default:
                /* one field is present but it is not last */
                skAppPrintErr(("Warning: Put '%s' last in --%s"
                               " when using --%s"),
                              ((PARSE_KEY_ELAPSED == time_fields)
                               ? "elapsed"
                               : ((PARSE_KEY_STIME == time_fields)
                                  ? "sTime"
                                  : "eTime")),
                              appOptions[OPT_FIELDS].name,
                              appOptions[OPT_PRESORTED_INPUT].name);
                break;
            }
            break;
          default:
            /* multiple time fields present */
            skAppPrintErr(("Warning: Using multiple time-related key"
                           " fields with\n\t--%s may lead to unexpected"
                           " results"),
                          appOptions[OPT_PRESORTED_INPUT].name);
            break;
        }
    }

    /* ensure eTime is the final field. when restoring values, eTime
     * relies on either sTime or elapsed being set */
    if ((time_fields & PARSE_KEY_ETIME)
        && (time_fields != PARSE_KEY_ETIME)
        && (etime_field != &key_fields[key_num_fields - 1]))
    {
        key_field_t tmp;
        memcpy(&tmp, etime_field, sizeof(key_field_t));
        memcpy(etime_field, &key_fields[key_num_fields-1],sizeof(key_field_t));
        memcpy(&key_fields[key_num_fields-1], &tmp, sizeof(key_field_t));
    }

    /* successful */
    rv = 0;

  END:
    if (rv != 0) {
        /* something went wrong.  clean up */
        if (key_fields) {
            free(key_fields);
            key_fields = NULL;
        }
    }
    /* do standard clean-up */
    if (parsed_vec != NULL) {
        skVectorDestroy(parsed_vec);
    }

    return rv;
}


/*
 *  appPluginGetTitle(buf, buf_size, keyfield);
 *
 *    Invoke the appropriate function on the plugin's field-handle,
 *    referenced by 'keyfield', to get the title of the field, and put
 *    that value into the buffer 'buf'.
 */
static void appPluginGetTitle(
    char        *text_buf,
    size_t       text_buf_size,
    void        *cb_data)
{
    key_field_t *keyfield = (key_field_t*)cb_data;
    skplugin_err_t err;
    const char *title;

    err = skPluginFieldTitle(keyfield->kf_field_handle, &title);
    assert(SKPLUGIN_OK == err);
    strncpy(text_buf, title, text_buf_size);
    text_buf[text_buf_size-1] = '\0';
}


/*
 *  appPluginGetTitle(rwrec, buf, buf_size, keyfield);
 *
 *    Invoke the appropriate function on the plugin's field-handle,
 *    referenced by 'keyfield', to get the value of the field, and put
 *    that value into the buffer 'buf'.
 */
static int appPluginGetValue(
    const rwRec UNUSED(*rwrec),
    char               *text_buf,
    size_t              text_buf_size,
    void               *cb_data)
{
    key_field_t *keyfield = (key_field_t*)cb_data;

    /* The text buffer exists on the keyfield and it has already been
     * filled, so just copy that value into 'buf'.  Yes, this is kind
     * of pointless. */
    strncpy(text_buf, keyfield->kf_textbuf, text_buf_size);
    return 0;
}

/*
 *  status = appAddPluginKey(key_field, map_entry);
 *
 *    Given 'map_entry', an entry from the key_field_map that
 *    represents a key field from a plug-in, activate that field and
 *    get the information from the field that the application
 *    requires.  Update 'key_field' with the plug-in's information.
 *    Update the global 'key_byte_width' value and add that field to
 *    the rwAsciiStream.
 */
static int appAddPluginKey(
    key_field_t            *keyfield,
    sk_stringmap_entry_t   *map_entry)
{
    skplugin_field_t *field_handle;
    size_t            bin_width;
    size_t            text_width;
    skplugin_err_t    err;

    field_handle = (skplugin_field_t*)map_entry->userdata;

    /* Activate the field (so cleanup knows about it) */
    err = skPluginFieldActivate(field_handle);
    if (err != SKPLUGIN_OK) {
        return -1;
    }

    /* Initialize this field */
    err = skPluginFieldRunInitialize(field_handle);
    if (err != SKPLUGIN_OK) {
        return -1;
    }

    /* get the required width of the column; but don't allow a value
     * larger than the maximum column width */
    err = skPluginFieldGetLenText(field_handle, &text_width);
    if (err != SKPLUGIN_OK) {
        return -1;
    }
    if (text_width == 0 || text_width >= VALUE_OUT_BUF_SIZE) {
        text_width = VALUE_OUT_BUF_SIZE - 1;
    }

    /* get the bin width for this field */
    err = skPluginFieldGetLenBin(field_handle, &bin_width);
    if (err != SKPLUGIN_OK) {
        return -1;
    }

    /* set the fields on the 'keyfield' */
    keyfield->kf_id           = map_entry->id;
    keyfield->kf_offset       = key_byte_width;
    keyfield->kf_width        = bin_width;
    keyfield->kf_field_handle = field_handle;
    keyfield->kf_r2k          = NULL;
    keyfield->kf_k2r          = NULL;

    key_byte_width += keyfield->kf_width;

    return rwAsciiAppendCallbackField(ascii_str, &appPluginGetTitle,
                                      &appPluginGetValue, keyfield,
                                      text_width);
}


/*
 *  bin_width = getFieldBinWidth(field_id);
 *
 *    Return the number of bytes required to hold the all potential
 *    values of the built-in field 'field_id'.  Return 0 if 'field_id'
 *    is invalid.
 */
static size_t getFieldBinWidth(rwrec_printable_fields_t field_id)
{
    switch (field_id) {
      case RWREC_FIELD_SIP:
#if SK_ENABLE_IPV6
        if (ipv6_policy >= SK_IPV6POLICY_MIX) {
            return RWREC_SIZEOF_SIPv6;
        }
#endif  /* SK_ENABLE_IPV6 */
        return RWREC_SIZEOF_SIPv4;

      case RWREC_FIELD_DIP:
#if SK_ENABLE_IPV6
        if (ipv6_policy >= SK_IPV6POLICY_MIX) {
            return RWREC_SIZEOF_DIPv6;
        }
#endif /* SK_ENABLE_IPV6 */
        return RWREC_SIZEOF_DIPv4;

      case RWREC_FIELD_NHIP:
#if SK_ENABLE_IPV6
        if (ipv6_policy >= SK_IPV6POLICY_MIX) {
            return RWREC_SIZEOF_NHIPv6;
        }
#endif /* SK_ENABLE_IPV6 */
        return RWREC_SIZEOF_NHIPv4;

      case RWREC_FIELD_SPORT:
        return RWREC_SIZEOF_SPORT;
      case RWREC_FIELD_DPORT:
      case RWREC_FIELD_ICMP_TYPE_CODE:
        return RWREC_SIZEOF_DPORT;
      case RWREC_FIELD_PROTO:
        return RWREC_SIZEOF_PROTO;
      case RWREC_FIELD_PKTS:
        return RWREC_SIZEOF_PKTS;
      case RWREC_FIELD_BYTES:
        return RWREC_SIZEOF_BYTES;
      case RWREC_FIELD_FLAGS:
        return RWREC_SIZEOF_FLAGS;
      case RWREC_FIELD_STIME:
      case RWREC_FIELD_STIME_MSEC:
      case RWREC_FIELD_ETIME:
      case RWREC_FIELD_ETIME_MSEC:
        /* size is 4 since we convert the sktime_t to seconds */
        return 4;
      case RWREC_FIELD_ELAPSED:
      case RWREC_FIELD_ELAPSED_MSEC:
        return RWREC_SIZEOF_ELAPSED;
      case RWREC_FIELD_SID:
        return RWREC_SIZEOF_SID;
      case RWREC_FIELD_INPUT:
        return RWREC_SIZEOF_INPUT;
      case RWREC_FIELD_OUTPUT:
        return RWREC_SIZEOF_OUTPUT;
      case RWREC_FIELD_INIT_FLAGS:
        return RWREC_SIZEOF_INIT_FLAGS;
      case RWREC_FIELD_REST_FLAGS:
        return RWREC_SIZEOF_REST_FLAGS;
      case RWREC_FIELD_TCP_STATE:
        return RWREC_SIZEOF_TCP_STATE;
      case RWREC_FIELD_APPLICATION:
        return RWREC_SIZEOF_APPLICATION;
      case RWREC_FIELD_FTYPE_CLASS:
      case RWREC_FIELD_FTYPE_TYPE:
        return RWREC_SIZEOF_FLOW_TYPE;
    }

    skAbortBadCase(field_id);
    return 0;                   /* NOTREACHED */
}


/*
 *  recToKeyStime(rwrec, bin_value);
 *
 *    Copy the start-time value (in seconds) from 'rwrec' into
 *    'bin_value'.  If the user requested the floor of the start-time,
 *    do that calculation.  The start-time is always stored in network
 *    byte order.
 */
static void recToKeyStime(
    const rwRec    *rwrec,
    void           *bin_value)
{
    uint32_t sTime;

    if (time_bin_size <= 1) {
        sTime = htonl(rwRecGetStartSeconds(rwrec));
    } else {
        sTime = rwRecGetStartSeconds(rwrec);
        /* find the floor of the time */
        sTime = htonl(sTime - (sTime % time_bin_size));
    }
    memcpy(bin_value, &sTime, sizeof(sTime));
}


/*
 *  keyToRecStime(rwrec, bin_value);
 *
 *    Copy the value from 'bin_value' to the start-time field in the
 *    'rwrec'.
 */
static void keyToRecStime(
    rwRec          *rwrec,
    const void     *bin_value)
{
    uint32_t sTime;

    memcpy(&sTime, bin_value, sizeof(sTime));
    rwRecSetStartTime(rwrec, sktimeCreate(ntohl(sTime), 0));
}


/*
 *  recToKeyElapsed(rwrec, bin_value);
 *
 *    Get the elapsed field from the 'rwrec' and adjust it so that the
 *    computed start-time and end-time fields fall into the proper
 *    bins with respect to the global 'time_bin_size'.  Put that
 *    computed elapsed value into 'bin_value' in network byte order.
 */
static void recToKeyElapsed(
    const rwRec    *rwrec,
    void           *bin_value)
{
    uint32_t elapsed;
    uint32_t sTime;

    if (!adjust_elapsed) {
        elapsed = htonl(rwRecGetElapsedSeconds(rwrec));
    } else {
        /*
         * set elapsed to:
         * (eTime - (eTime % bin_size)) - (sTime - (sTime % bin_size))
         */
        sTime = rwRecGetStartSeconds(rwrec);
        elapsed = rwRecGetElapsedSeconds(rwrec);
        elapsed = htonl(elapsed + (sTime % time_bin_size)
                        - ((sTime + elapsed) % time_bin_size));
    }
    memcpy(bin_value, &elapsed, sizeof(elapsed));
}


/*
 *  keyToRecElapsed(rwrec, bin_value);
 *
 *    Copy the value from 'bin_value' to the elapsed field in the
 *    'rwrec'.
 */
static void keyToRecElapsed(
    rwRec          *rwrec,
    const void     *bin_value)
{
    uint32_t elapsed;

    memcpy(&elapsed, bin_value, sizeof(elapsed));
    rwRecSetElapsed(rwrec, 1000 * ntohl(elapsed));
}


/*
 *  recToKeyEtime(rwrec, bin_value);
 *
 *    Use the start-time and elapsed fields on the 'rwrec' to compute
 *    the end-time and put the value into 'bin_value'.  Adjust the
 *    end-time relative to the global 'time_bin_size' if required.
 */
static void recToKeyEtime(
    const rwRec    *rwrec,
    void           *bin_value)
{
    uint32_t eTime;

    /* compute eTime and put it into bin_value */
    if (time_bin_size == 0) {
        eTime = htonl(rwRecGetEndSeconds(rwrec));
    } else {
        /* compute floor */
        eTime = rwRecGetEndSeconds(rwrec);
        eTime = htonl(eTime - (eTime % time_bin_size));
    }
    memcpy(bin_value, &eTime, sizeof(eTime));
}


/*
 *  keyToRecEtime(rwrec, bin_value);
 *
 *    Adjust the start-time and elapsed fields on the 'rwrec' so that
 *    the end-time will have the value present in 'bin_value'.  This
 *    function assumes the start-time and elapsed values have already
 *    been copied from the byte-array.
 */
static void keyToRecEtime(
    rwRec          *rwrec,
    const void     *bin_value)
{
    uint32_t eTime, sTime, elapsed;

    memcpy(&eTime, bin_value, sizeof(eTime));
    eTime = ntohl(eTime);

    switch (time_fields) {
      case PARSE_KEY_ETIME:
        /* etime only; just set sTime to eTime--elapsed is already 0 */
        rwRecSetStartTime(rwrec, sktimeCreate(eTime, 0));
        break;

      case (PARSE_KEY_ETIME | PARSE_KEY_ELAPSED):
        /* etime and elapsed; set start time based on end time and elapsed */
        elapsed = rwRecGetElapsedSeconds(rwrec);
        rwRecSetStartTime(rwrec, sktimeCreate((eTime - elapsed), 0));
        break;

      case (PARSE_KEY_ETIME | PARSE_KEY_STIME):
        /* etime and stime; set elapsed as their difference */
        sTime = rwRecGetStartSeconds(rwrec);
        assert(sTime <= eTime);
        rwRecSetElapsed(rwrec, (1000 * (eTime - sTime)));
        break;

      default:
        skAbortBadCase(time_fields);
    }
}


/*
 *  int = appNextInput(&rwios);
 *
 *    Fill 'rwios' with the next input file to read.  Return 0 if
 *    'rwios' was successfully opened or 1 if there are no more input
 *    files.
 *
 *    When an input file cannot be opened, the return value is
 *    dependent on the error.  If the error is due to being out of
 *    file handles or memory (EMFILE or ENOMEM), return -2; otherwise
 *    return -1.
 */
int appNextInput(skstream_t **rwios)
{
    static int counter = -1;
    int rv;

    if (counter < 0) {
        counter = ioISP->firstFile;
    } else {
        ++counter;
        if (counter == ioISP->fileCount) {
            /* no more input */
            return 1;
        }
    }

    /* create rwios and open file */
    errno = 0;
    rv = skStreamOpenSilkFlow(rwios, ioISP->fnArray[counter], SK_IO_READ);
    if (rv) {
        if (errno == EMFILE || errno == ENOMEM) {
            /* decrement counter so we try to open this file again */
            --counter;
            rv = -2;
            TRACEMSG(("Unable to open '%s': %s",
                      ioISP->fnArray[counter], strerror(errno)));
        } else {
            if (app_flags.print_filenames) {
                fprintf(PRINT_FILENAMES_FH, "%s\n", ioISP->fnArray[counter]);
            }
            skStreamPrintLastErr(*rwios, rv, &skAppPrintErr);
            rv = -1;
        }
        skStreamDestroy(rwios);
        return rv;
    }

    if (app_flags.print_filenames) {
        fprintf(PRINT_FILENAMES_FH, "%s\n", skStreamGetPathname(*rwios));
    }
    skStreamSetCopyInput(*rwios, ioISP->inputCopyFD);
    skStreamSetIPv6Policy(*rwios, ipv6_policy);

    return 0;
}


/*
 *  ok = createStringmaps();
 *
 *    Create the string-maps to assist in parsing the --fields and
 *    --values switches.
 */
static int createStringmaps(void)
{
    skplugin_field_iter_t  iter;
    skplugin_err_t         err;
    skplugin_field_t      *field_handle;
    sk_stringmap_entry_t   map_entry;
    sk_stringmap_status_t  map_err;
    const char           **field_names;
    const char           **name;
    uint32_t               max_id;
    size_t                 i;

    /*
     * initialize string-map of key field identifiers: add default
     * keys, then remove millisec fields, since using them as a key
     * makes little sense.
     *
     * Note that although we remove the MSEC fields from the available
     * fields here, the remainder of the code still supports MSEC
     * fields---which are mapped onto the non-MSEC versions of the
     * fields. */
    if (rwAsciiFieldMapAddDefaultFields(&key_field_map)) {
        skAppPrintErr("Unable to setup fields stringmap");
        return -1;
    }
    (void)skStringMapRemoveByID(key_field_map, RWREC_FIELD_STIME_MSEC);
    (void)skStringMapRemoveByID(key_field_map, RWREC_FIELD_ETIME_MSEC);
    (void)skStringMapRemoveByID(key_field_map, RWREC_FIELD_ELAPSED_MSEC);
    max_id = RWREC_PRINTABLE_FIELD_COUNT - 1;

    /* add any key fields from the plug-ins */
    err = skPluginFieldIteratorBind(&iter, SKPLUGIN_APP_UNIQ_FIELD, 1);
    assert(err == SKPLUGIN_OK);
    while (skPluginFieldIteratorNext(&iter, &field_handle)) {
        err = skPluginFieldName(field_handle, &field_names);
        assert(err == SKPLUGIN_OK);
        ++max_id;

        /* Add keys to the key_field_map */
        for (name = field_names; *name; name++) {
            memset(&map_entry, 0, sizeof(map_entry));
            map_entry.name = *name;
            map_entry.id = max_id;
            map_entry.userdata = field_handle;
            map_err = skStringMapAddEntries(key_field_map, 1, &map_entry);
            if (map_err != SKSTRINGMAP_OK) {
                const char *plugin_name;
                skPluginFieldGetPluginName(field_handle, &plugin_name);
                skAppPrintErr(("Plug-in cannot add field named '%s': %s."
                               " Plug-in file: %s"),
                              *name, skStringMapStrerror(map_err),plugin_name);
                return -1;
            }
        }
    }


    max_id = 0;

    /* create the string-map for value field identifiers */
    if (skStringMapCreate(&value_field_map)) {
        skAppPrintErr("Unable to create map for values");
        return -1;
    }

    /* add the built-in names */
    for (i = 0; i < num_builtin_values; ++i) {
        memset(&map_entry, 0, sizeof(sk_stringmap_entry_t));
        map_entry.name = builtin_values[i].bv_f.vf_title;
        map_entry.id = builtin_values[i].bv_f.vf_id;
        map_err = skStringMapAddEntries(value_field_map, 1, &map_entry);
        if (map_err) {
            skAppPrintErr("Unable to add value field named '%s': %s",
                          map_entry.name, skStringMapStrerror(map_err));
            return -1;
        }
        if (map_entry.id > max_id) {
            max_id = map_entry.id;
        }
    }

    /* add the value fields from the plugins */
    err = skPluginFieldIteratorBind(&iter, SKPLUGIN_APP_UNIQ_VALUE, 1);
    assert(err == SKPLUGIN_OK);
    while (skPluginFieldIteratorNext(&iter, &field_handle)) {
        err = skPluginFieldName(field_handle, &field_names);
        assert(err == SKPLUGIN_OK);
        ++max_id;

        /* Add value names to the field_map */
        for (name = field_names; *name; ++name) {
            memset(&map_entry, 0, sizeof(map_entry));
            map_entry.name = *name;
            map_entry.id = max_id;
            map_entry.userdata = field_handle;
            map_err = skStringMapAddEntries(value_field_map, 1, &map_entry);
            if (map_err != SKSTRINGMAP_OK) {
                const char *plugin_name;
                skPluginFieldGetPluginName(field_handle, &plugin_name);
                skAppPrintErr(("Plug-in cannot add value named '%s': %s."
                               " Plug-in file: %s"),
                              *name, skStringMapStrerror(map_err),plugin_name);
                return -1;
            }
        }
    }

    return 0;
}


/*
** Local Variables:
** mode:c
** indent-tabs-mode:nil
** c-basic-offset:4
** End:
*/
