/*
** Copyright (C) 2001-2012 by Carnegie Mellon University.
**
** @OPENSOURCE_HEADER_START@
**
** Use of the SILK system and related source code is subject to the terms
** of the following licenses:
**
** GNU Public License (GPL) Rights pursuant to Version 2, June 1991
** Government Purpose License Rights (GPLR) pursuant to DFARS 252.227.7013
**
** NO WARRANTY
**
** ANY INFORMATION, MATERIALS, SERVICES, INTELLECTUAL PROPERTY OR OTHER
** PROPERTY OR RIGHTS GRANTED OR PROVIDED BY CARNEGIE MELLON UNIVERSITY
** PURSUANT TO THIS LICENSE (HEREINAFTER THE "DELIVERABLES") ARE ON AN
** "AS-IS" BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY
** KIND, EITHER EXPRESS OR IMPLIED AS TO ANY MATTER INCLUDING, BUT NOT
** LIMITED TO, WARRANTY OF FITNESS FOR A PARTICULAR PURPOSE,
** MERCHANTABILITY, INFORMATIONAL CONTENT, NONINFRINGEMENT, OR ERROR-FREE
** OPERATION. CARNEGIE MELLON UNIVERSITY SHALL NOT BE LIABLE FOR INDIRECT,
** SPECIAL OR CONSEQUENTIAL DAMAGES, SUCH AS LOSS OF PROFITS OR INABILITY
** TO USE SAID INTELLECTUAL PROPERTY, UNDER THIS LICENSE, REGARDLESS OF
** WHETHER SUCH PARTY WAS AWARE OF THE POSSIBILITY OF SUCH DAMAGES.
** LICENSEE AGREES THAT IT WILL NOT MAKE ANY WARRANTY ON BEHALF OF
** CARNEGIE MELLON UNIVERSITY, EXPRESS OR IMPLIED, TO ANY PERSON
** CONCERNING THE APPLICATION OF OR THE RESULTS TO BE OBTAINED WITH THE
** DELIVERABLES UNDER THIS LICENSE.
**
** Licensee hereby agrees to defend, indemnify, and hold harmless Carnegie
** Mellon University, its trustees, officers, employees, and agents from
** all claims or demands made against them (and any related losses,
** expenses, or attorney's fees) arising out of, or relating to Licensee's
** and/or its sub licensees' negligent use or willful misuse of or
** negligent conduct or willful misconduct regarding the Software,
** facilities, or other rights or assistance granted by Carnegie Mellon
** University under this License, including, but not limited to, any
** claims of product liability, personal injury, death, damage to
** property, or violation of any laws or regulations.
**
** Carnegie Mellon University Software Engineering Institute authored
** documents are sponsored by the U.S. Department of Defense under
** Contract FA8721-05-C-0003. Carnegie Mellon University retains
** copyrights in all material produced under this contract. The U.S.
** Government retains a non-exclusive, royalty-free license to publish or
** reproduce these documents, or allow others to do so, for U.S.
** Government purposes only pursuant to the copyright license under the
** contract clause at 252.227.7013.
**
** @OPENSOURCE_HEADER_END@
*/

/*
**  rwstats.c
**
**  Implementation of the rwstats suite application.
**
**  Reads packed files or reads the output from rwfilter and can
**  compute a battery of characterizations and statistics:
**
**  -- Top N or Bottom N SIPs with counts; count of unique SIPs
**  -- Top N or Bottom N DIPs with counts; count of unique DIPs
**  -- Top N or Bottom N SIP/DIP pairs with counts; count of unique
**     SIP/DIP pairs (for a limited number of records)
**  -- Top N or Bottom N Src Ports with counts; count of unique Src Ports
**  -- Top N or Bottom N Dest Ports with counts; count of unique Dest Ports
**  -- Top N or Bottom N Protocols with counts; count of unique protocols
**  -- For more continuous variables (bytes, packets, bytes/packet)
**     provide statistics such as min, max, quartiles, and intervals
**
**  Instead of specifying a Top N or Bottom N as an absolute number N,
**  the user may specify a cutoff threshold.  In this case, the Top N
**  or Bottom N required to print all counts meeting the threshold is
**  computed by the application.
**
**  Instead of specifying the threshold as an absolute count, the user
**  may specify the threshold as percentage of all input records.  For
**  this case, the absolute threshold is calculated and then that is
**  used to calculate the Top N or Bottom N.
**
**  The application will only do calculations and produce output when
**  asked to do so.  At least one argument is required to tell the
**  application what to do.
**
**  Ideas for expansion
**  -- Similarly for other variables, e.g., country code.
**  -- Output each type of data to its own file
**  -- Save intermediate data in files for faster reprocessing by this
**     application
**  -- Save intermediate data in files for processing by other
**     applications
**
*/

/*
**  IMPLEMENTATION NOTES
**
**  For each input type (source ip, dest ip, source port, proto, etc),
**  there are two globals: limit_<type> contains the value the user
**  entered for the input type, and wanted_stat_<type> is a member
**  of the wanted_stat_type and says what the limit_<type> value
**  represents---e.g., the Top N, the bottom threshold percentage, etc.
**
**  The application takes input (either from stdin or as files on
**  command line) and calls processFile() on each.  A count of each
**  unique source IP addresses is stored in the IpCounter hash table
**  counter_src_ip; Destinations IPs in counter_dest_ip; data for
**  flow between a Source IP and Destination IP pair are stored in
**  counter_pair_ip.
**
**  Since there are relatively few ports and protocols, two
**  65536-elements arrays, src_port_array and dest_port_array are
**  used to store a count of the records for each source and
**  destination port, respectively, and a 256-element array,
**  proto_array, is used to store a count of each protocol.
**
**  Minima, maxima, quartile, and interval data are stored for each of
**  bytes, packets, and bytes-per-packet for all flows--regardless of
**  protocol--and detailed for a limited number (RWSTATS_NUM_PROTO-1)
**  of protocols..  The minima and maxima are each stored in arrays
**  for each of bytes, packets, bpp.  For example bytes_min[0]
**  stores the smallest byte count regardless of protocol (ie, over
**  all protocols), and pkts_max[1] stores the largest packet count
**  for the first protocol the user specified.  The mapping from
**  protocol to array index is given by proto_to_stats_idx[], where
**  the index into proto_to_stats_idx[] returns an integer that is
**  the index into bytes_min[].  Data for the intervals is stored in
**  two dimensional arrays, where the first dimension is the same as
**  for the minima and maxima, and the second dimension is the number
**  of intervals, NUM_INTERVALS.
**
**  Once data is collected, it is processed.
**
**  For the IPs, the user is interested the number of unique IPs and
**  the IPs with the topN counts (things are similar for the bottomN,
**  but we use topN in this dicussion to keep things more clear).  In
**  the printTopIps() function, an array with 2*topN elements is
**  created and passed to calcTopIps(); that array will be the result
**  array and it will hold the topN IpAddr and IpCount pairs in sorted
**  order.  In calcTopIps(), a working array of 2*topN elements and a
**  Heap data structure with topN nodes are created.  The topN
**  IpCounts seen are stored as IpCount/IpAddr pairs in the
**  2*topN-element array (but not in sorted order), and the heap
**  stores pointers into that array with the lowest IpCount at the
**  root of the heap.  As the function iterates over the hash table,
**  it compares the IpCount of the current hash-table element with the
**  IpCount at the root of the heap.  When the IpCount of the
**  hash-table element is larger, the root of the heap is removed, the
**  IpCount/IpAddr pair pointed to by the former heap-root is removed
**  from the 2*topN-element array and replaced with the new
**  IpCount/IpAddr pair, and finally a new node is added to the heap
**  that points to the new IpCount/IpAddr pair.  This continues until
**  all hash-table entries are processed.  To get the list of topN IPs
**  from highest to lowest, calcTopIps() removes elements from the
**  heap and stores them in the result array from position N-1 to
**  position 0.
**
**  Finding the topN source ports, topN destination ports, and topN
**  protocols are similar to finding the topN IPs, except the ports
**  and protocols are already stored in an array, so pointers directly
**  into the src_port_array, dest_port_array, and proto_array
**  are stored in the heap.  When generating output, the number of the
**  port or protocol is determined by the diffence between the pointer
**  into the *_port_array or proto_array and its start.
**
**  Instead of specifying a topN, the user may specify a cutoff
**  threshold.  In this case, the topN required to print all counts
**  meeting the threshold is computed by looping over the IP
**  hash-table or port/protocol arrays and finding all entries with at
**  least threshold hits.
**
**  The user may specify a percentage threshold instead of an absolute
**  threshold.  Once all records are read, the total record count is
**  multiplied by the percentage threshold to get the absolute
**  threshold cutoff, and that is used to calculate the topN as
**  described in the preceeding paragraph.
**
**  For the continuous variables bytes, packets, bpp, most of the work
**  was done while reading the data, so processing is minimal.  Only
**  the quartiles must be calculated.
*/

#include <silk/silk.h>

RCSIDENT("$SiLK: rwstats.c 372a8bc31d8a 2012-02-10 21:55:28Z mthomas $");

#include <silk/hashlib.h>
#include <silk/skheap.h>
#include "rwstats.h"


/* TYPEDEFS AND DEFINES */

/* Initializer for hash table creation */
#define HASH_INITIAL_SIZE 500000

/* Maximum number of files to attempt to merge at once---used for
 * setting array sizes */
#define MAX_MERGE_FILES 1024

typedef enum {
    /* compute the distinct count by comparing hash keys */
    RWSTATS_DISTINCT_COUNT,
    /* compute the distinct count by comparing the previous IP with
     * this IP.  requires that we cache the previous IP */
    RWSTATS_DISTINCT_CACHE,
    /* compute the dintinct count by keeping track of each IP we
     * see. */
    RWSTATS_DISTINCT_IPSET
} distinct_type_t;

typedef struct distinct_value_st {
    uint64_t            dv_count;
    distinct_type_t     dv_type;
    value_types_t       dv_ip;
    union dv_un {
        skIPTree_t     *dv_ipset;
        uint32_t        dv_cache;
    }                   dv_v;
} distinct_value_t;


/* For output, add an "s" when speaking of values other than 1 */
#define PLURAL(plural_val) (((plural_val) == 1) ? "" : "s")

/*
 *  dir_val_type = DIR_AND_TYPE(direction, value_type);
 *
 *    Return a single integer that encodes the direction (RWSTATS_DIR_TOP,
 *    RWSTATS_DIR_BTM) and the value type to compute (VAL_BYTES, VAL_PACKETS,
 *    VAL_FLOWS).
 */
#define DIR_AND_TYPE(dat_t_or_b, dat_val_type)  \
    (((dat_t_or_b) << 3) | (dat_val_type))


#define HEAP_NODE_VAL_PTR(hnvp_node)  (hnvp_node)
#define HEAP_NODE_KEY_PTR(hnkp_node)  ((hnkp_node) + heap_value_bytes)


/* The summation values are stored in an array of bytes; the offsets
 * into this array are stored in the variable value_fields[].  The
 * following macros access the memory at these offsets or copy the
 * values from/to the byte-array and a variable. */
#define VAL_FIELD_GET_PTR(vf_ptr, vf_idx)                       \
    (((uint8_t*)(vf_ptr)) + value_fields[(vf_idx)].vf_offset)

#define VAL_FIELD_GET_PTR_U32(vf_ptr, vf_idx)           \
    ((uint32_t*)VAL_FIELD_GET_PTR(vf_ptr, vf_idx))

#define VAL_FIELD_GET_PTR_U64(vf_ptr, vf_idx)           \
    ((uint64_t*)VAL_FIELD_GET_PTR(vf_ptr, vf_idx))

#define VAL_FIELD_MEMGET(vf_ptr, vf_idx, out_val)               \
    memcpy((out_val), VAL_FIELD_GET_PTR(vf_ptr, vf_idx),        \
           value_fields[(vf_idx)].vf_byte_width)

#define VAL_FIELD_MEMSET(vf_ptr, vf_idx, in_val)        \
    memcpy(VAL_FIELD_GET_PTR(vf_ptr, vf_idx), (in_val), \
           value_fields[(vf_idx)].vf_byte_width)

/*
 * Macro to take the key from the hash table 'hashkey' and copy the
 * distinct field at position 'dnum' from it and into 'ip_ptr'
 */
#if  RWSTATS_USE_MEMCPY
#define GET_DISTINCT_IP(hashkey, dnum, ip_ptr)                  \
    memcpy((ip_ptr),                                            \
           (hashkey) + distinct_fields[(dnum)].df_offset,       \
           distinct_fields[(dnum)].df_byte_width)
#else
#define GET_DISTINCT_IP(hashkey, dnum, ip_ptr)                  \
    (*(ip_ptr)) = *(uint32_t*)((hashkey) + distinct_fields[(dnum)].df_offset)
#endif

/*
 *  If 'ipaddr' is not in 'ipset', add it and increment
 *  'distinct_count'.  If 'ipaddr' is already in 'ipset', do nothing.
 *
 *  In addition, print a message (one time) if we fail to add an IP to
 *  a IPset, meaning that the distinct counts are incorrect.  Uses the
 *  global variable 'distinct_err'.
 */
#define MEMCHECK_IPSET_ADD(mia_ipset, mia_ipaddr, mia_distinct_count)   \
    if (skIPTreeCheckAddress((mia_ipset), (mia_ipaddr))) {              \
        /* no need to do anything */                                    \
    } else if (skIPTreeAddAddress((mia_ipset), (mia_ipaddr)) == 0) {    \
        ++(mia_distinct_count);                                         \
    } else if (!distinct_err) {                                         \
        skAppPrintErr("Out of memory counting distinct IPs."            \
                      " Counts may be incorrect");                      \
        distinct_err = 1;                                               \
    }


#define KEY_FIELD_PTR(keyarray, idx)            \
    ((keyarray) + key_fields[(idx)].kf_offset)

/*
 *  These macros either get a value from a record, or set a value on
 *  the record.  The value will either be uint16 or a uint32, and it
 *  may be in network or native byte order.
 */
#define REC_TO_KEY_NATIVE_08(func, rec, keyarray, idx)  \
    { *(KEY_FIELD_PTR(keyarray, idx)) = func(rec); }

#define KEY_TO_REC_NATIVE_08(func, rec, keyarray, idx)  \
    func((rec), *(KEY_FIELD_PTR(keyarray, idx)))

#define REC_TO_KEY_NETWORK_08  REC_TO_KEY_NATIVE_08

#define KEY_TO_REC_NETWORK_08  KEY_TO_REC_NATIVE_08

#if  !RWSTATS_USE_MEMCPY

#define REC_TO_KEY_NATIVE_16(func, rec, keyarray, idx)          \
    { *((uint16_t*)KEY_FIELD_PTR(keyarray, idx)) = func(rec); }

#define REC_TO_KEY_NETWORK_16(func, rec, keyarray, idx)                 \
    { *((uint16_t*)KEY_FIELD_PTR(keyarray, idx)) = htons(func(rec)); }

#define REC_TO_KEY_NATIVE_32(func, rec, keyarray, idx)          \
    { *((uint32_t*)KEY_FIELD_PTR(keyarray, idx)) = func(rec); }

#define REC_TO_KEY_NETWORK_32(func, rec, keyarray, idx)                 \
    { *((uint32_t*)KEY_FIELD_PTR(keyarray, idx)) = htonl(func(rec)); }

#define KEY_TO_REC_NATIVE_16(func, rec, keyarray, idx)          \
    func((rec), *((uint16_t*)KEY_FIELD_PTR(keyarray, idx)))

#define KEY_TO_REC_NETWORK_16(func, rec, keyarray, idx)                 \
    func((rec), htons(*((uint16_t*)KEY_FIELD_PTR(keyarray, idx))))

#define KEY_TO_REC_NATIVE_32(func, rec, keyarray, idx)          \
    func((rec), *((uint32_t*)KEY_FIELD_PTR(keyarray, idx)))

#define KEY_TO_REC_NETWORK_32(func, rec, keyarray, idx)                 \
    func((rec), htonl(*((uint32_t*)KEY_FIELD_PTR(keyarray, idx))))

#else  /* following is when used when RWSTATS_USE_MEMCPY is true */

#define REC_TO_KEY_NATIVE_16(func, rec, keyarray, idx)                  \
    {                                                                   \
        uint16_t t16 = func(rec);                                       \
        memcpy(KEY_FIELD_PTR(keyarray, idx), &t16, sizeof(t16));        \
    }

#define REC_TO_KEY_NETWORK_16(func, rec, keyarray, idx)                 \
    {                                                                   \
        uint16_t t16 = htons(func(rec));                                \
        memcpy(KEY_FIELD_PTR(keyarray, idx), &t16, sizeof(t16));        \
    }

#define REC_TO_KEY_NATIVE_32(func, rec, keyarray, idx)                  \
    {                                                                   \
        uint32_t t32 = func(rec);                                       \
        memcpy(KEY_FIELD_PTR(keyarray, idx), &t32, sizeof(t32));        \
    }

#define REC_TO_KEY_NETWORK_32(func, rec, keyarray, idx)                 \
    {                                                                   \
        uint32_t t32 = htonl(func(rec));                                \
        memcpy(KEY_FIELD_PTR(keyarray, idx), &t32, sizeof(t32));        \
    }

#define KEY_TO_REC_NATIVE_16(func, rec, keyarray, idx)                  \
    {                                                                   \
        uint16_t t16;                                                   \
        memcpy(&t16, KEY_FIELD_PTR(keyarray, idx), sizeof(t16));        \
        func((rec), t16);                                               \
    }

#define KEY_TO_REC_NETWORK_16(func, rec, keyarray, idx)                 \
    {                                                                   \
        uint16_t t16;                                                   \
        memcpy(&t16, KEY_FIELD_PTR(keyarray, idx), sizeof(t16));        \
        func((rec), htons(t16));                                        \
    }

#define KEY_TO_REC_NATIVE_32(func, rec, keyarray, idx)                  \
    {                                                                   \
        uint32_t t32;                                                   \
        memcpy(&t32, KEY_FIELD_PTR(keyarray, idx), sizeof(t32));        \
        func((rec), t32);                                               \
    }

#define KEY_TO_REC_NETWORK_32(func, rec, keyarray, idx)                 \
    {                                                                   \
        uint32_t t32;                                                   \
        memcpy(&t32, KEY_FIELD_PTR(keyarray, idx), sizeof(t32));        \
        func((rec), htonl(t32));                                        \
    }

#endif  /* RWSTATS_USE_MEMCPY */


/* EXPORTED VARIABLES */

/* user limit for this stat: N if top N or bottom N, threshold, or
 * percentage */
rwstats_limit_t limit;

rwstats_direction_t direction = RWSTATS_DIR_TOP;

/* the final delimiter on each line; assume none */
char final_delim[] = {'\0', '\0'};

int width[RWSTATS_COLUMN_WIDTH_COUNT] = {
    15, /* WIDTH_KEY:   key */
    20, /* WIDTH_VAL:   count */
    10, /* WIDTH_INTVL: interval maximum */
    10, /* WIDTH_PCT:   percentage value */
};

/* non-zero when --overall-stats or --detail-proto-stats is given */
int proto_stats = 0;

/* delimiter between output columns */
char delimiter = '|';

/* the columns/fields that make up our key for this run */
key_field_t *key_fields = NULL;

/* the number of actual columns/fields that make up the key */
size_t key_num_fields = 0;

/* the total byte count of the key */
size_t key_byte_width = 0;

/* information about distinct fields that are in use. */
distinct_field_t distinct_fields[NUM_DISTINCTS];

/* Number of distinct fields the user has requested */
size_t distinct_num_fields = 0;

/* Number of value fields the user has requested */
size_t value_num_fields = 0;

/* the columns/fields that make up our values for this run */
value_field_t *value_fields = NULL;

/* Total byte count for all value_fields that are being computed */
size_t value_byte_width = 0;

/* the initial values as a byte-encoded array */
uint8_t *value_initial_vals = NULL;

/* the byte count of the key passed to the hash table, which is the
 * length of the "key" and any "distinct" fields  */
size_t hash_key_bytes = 0;

/* the byte length of the value used by the hash table, which has a
 * minimum length of 1  */
size_t hash_value_bytes = 0;

/* the byte length of the key-value pair in the hash table, which is
 * comprised of the key and the value */
size_t hash_node_bytes = 0;

/* the byte length of the value used by the heap */
size_t heap_value_bytes = 0;

/* the byte length of the key-value pair in the heap */
size_t heap_node_bytes = 0;

/* to convert the key fields (as an rwRec) to ascii */
rwAsciiStream_t *ascii_str;

/* the real output */
app_output_t output;

/* flags set by the user options */
app_flags_t app_flags;

/* how to print timestamps */
uint32_t time_flags = SKTIMESTAMP_NOMSEC;

/* how to handle IPv6 flows */
sk_ipv6policy_t ipv6_policy = SK_IPV6POLICY_MIX;

/* CIDR block mask for src and dest ips.  If 0, use all bits;
 * otherwise, the IP address should be bitwised ANDed with this
 * value. */
uint32_t cidr_sip = 0;
uint32_t cidr_dip = 0;


/* LOCAL VARIABLES */

/* whether we encounted an error counting distinct IP addresses */
static int distinct_err = 0;

/* number of records read */
static uint64_t record_count = 0;

/* Summation of whatever value (bytes, packets, flows) we are using.
 * When counting flows, this will be equal to record_count. */
static uint64_t value_total = 0;

/* scratch space for a node */
static uint8_t scratch[MAX_HASH_NODE_BYTES];


/* FUNCTION DEFINITIONS */

/*
 *  topnPrintHeader();
 *
 *    Print the header giving number of unique hash keys seen.  Should
 *    be called even when --no-titles is requested, since it will
 *    print a warning if no records met the threshold.
 */
static void topnPrintHeader(void)
{
    char buf[VALUE_OUT_BUF_SIZE+2];
    const char *direction_name = "";
    const char *above_below = "";
    size_t i;

    /* handle no titles */
    if (app_flags.no_titles) {
        return;
    }

    switch (direction) {
      case RWSTATS_DIR_TOP:
        direction_name = "Top";
        above_below = "above";
        break;
      case RWSTATS_DIR_BOTTOM:
        direction_name = "Bottom";
        above_below = "below";
        break;
    }

    /* Get a count of unique flows */
    fprintf(output.fp, ("INPUT: %" PRIu64 " Record%s for %" PRIu64 " Bin%s"),
            record_count, PLURAL(record_count),
            limit.entries, PLURAL(limit.entries));
    if (value_total) {
        fprintf(output.fp, (" and %" PRIu64 " Total %s"),
                value_total, value_fields[0].vf_title);
    }
    fprintf(output.fp, "\n");

    /* handle the no data case */
    if (limit.value[RWSTATS_COUNT] < 1) {
        switch (limit.type) {
          case RWSTATS_COUNT:
            skAppPrintErr("User was allowed to enter count of 0");
            skAbortBadCase(limit.type);

          case RWSTATS_THRESHOLD:
            fprintf(output.fp,
                    ("OUTPUT: No bins %s threshold of %" PRIu64 " %s\n"),
                    above_below, limit.value[RWSTATS_THRESHOLD],
                    value_fields[0].vf_title);
            break;

          case RWSTATS_PERCENTAGE:
            fprintf(output.fp, ("OUTPUT: No bins %s threshold of %"
                                PRIu64 "%% (%" PRIu64 " %s)\n"),
                    above_below, limit.value[RWSTATS_PERCENTAGE],
                    limit.value[RWSTATS_THRESHOLD], value_fields[0].vf_title);
            break;
        }
        return;
    }

    switch (limit.type) {
      case RWSTATS_COUNT:
        fprintf(output.fp, ("OUTPUT: %s %" PRIu64 " Bin%s by %s\n"),
                direction_name, limit.value[RWSTATS_COUNT],
                PLURAL(limit.value[RWSTATS_COUNT]),
                value_fields[0].vf_title);
        break;

      case RWSTATS_THRESHOLD:
        fprintf(output.fp, ("OUTPUT: %s %" PRIu64 " bins by %s"
                            " (threshold %" PRIu64 ")\n"),
                direction_name, limit.value[RWSTATS_COUNT],
                value_fields[0].vf_title, limit.value[RWSTATS_THRESHOLD]);
        break;

      case RWSTATS_PERCENTAGE:
        fprintf(output.fp, ("OUTPUT: %s %" PRIu64 " bins by %s"
                            " (%" PRIu64 "%% == %" PRIu64 ")\n"),
                direction_name, limit.value[RWSTATS_COUNT],
                value_fields[0].vf_title, limit.value[RWSTATS_PERCENTAGE],
                limit.value[RWSTATS_THRESHOLD]);
        break;
    }

    if (app_flags.no_titles) {
        return;
    }

    /* print key titles */
    rwAsciiPrintTitles(ascii_str);

    /* print value titles */
    for (i = 0; i < value_num_fields; ++i) {
        if (app_flags.no_columns) {
            fprintf(output.fp, "%c%s",
                    delimiter, value_fields[i].vf_title);
        } else {
            fprintf(output.fp, "%c%*.*s",
                    delimiter,
                    value_fields[i].vf_text_len, value_fields[i].vf_text_len,
                    value_fields[i].vf_title);
        }
    }

    if (!app_flags.no_percents) {
        snprintf(buf, sizeof(buf), "%%%s", value_fields[0].vf_title);
        buf[sizeof(buf)-1] = '\0';

        if (app_flags.no_columns) {
            fprintf(output.fp, "%c%s%c%s",
                    delimiter, buf, delimiter, "cumul_%");
        } else {
            fprintf(output.fp, ("%c%*.*s%c%*.*s"),
                    delimiter, width[WIDTH_PCT], width[WIDTH_PCT], buf,
                    delimiter, width[WIDTH_PCT], width[WIDTH_PCT], "cumul_%");
        }
    }

    fprintf(output.fp, "%s\n", final_delim);
}


/*
 *  writeAsciiRecord(key, value);
 *
 *    Unpacks the fields from 'key' and the value fields from 'value'.
 *    Prints the key fields and the value fields to the global output
 *    stream 'output.fp'.
 */
static void writeAsciiRecord(
    uint8_t        *key,
    const uint8_t  *values)
{
    size_t i;
    char buf[VALUE_OUT_BUF_SIZE];
    rwRec rwrec;
    skplugin_err_t err;
#if RWSTATS_USE_MEMCPY
    uint64_t val64;
    uint32_t val32;
#endif
#if  SK_ENABLE_IPV6
    int added_ip = 0;
#endif

    /* in mixed IPv4/IPv6 setting, keep record as IPv4 unless an IPv6
     * address forces us to use IPv6. */
#define MAYBE_KEY_TO_REC_IPV6(func_v6, func_v4, rec, keyarray, idx)     \
    if (rwRecIsIPv6(rec)) {                                             \
        /* record is already IPv6 */                                    \
        func_v6((rec), KEY_FIELD_PTR(keyarray, idx));                   \
        break;                                                          \
    } else if (ipv6_policy >= SK_IPV6POLICY_MIX) {                      \
        /* else record is IPv4, and IP is encoded as IPv6 */            \
        if (SK_IPV6_IS_V4INV6(KEY_FIELD_PTR(keyarray, idx))) {          \
            /* record is IPv4 and so is the IP */                       \
            uint8_t *ipv4 = KEY_FIELD_PTR(keyarray, idx) + 12;          \
            uint32_t t32;                                               \
            memcpy(&t32, ipv4, sizeof(t32));                            \
            func_v4((rec), htonl(t32));                                 \
            added_ip = 1;                                               \
            break;                                                      \
        }                                                               \
        /* record is IPv4 and address is IPv6 */                        \
        if (added_ip) {                                                 \
            /* IPv4 addresses exist on record; must convert */          \
            rwRecConvertToIPv6(rec);                                    \
        } else {                                                        \
            /* no addresses on record yet; don't need to                \
             * convert */                                               \
            rwRecSetIPv6(rec);                                          \
        }                                                               \
        func_v6((rec), KEY_FIELD_PTR(keyarray, idx));                   \
        break;                                                          \
    }

    /* Zero out rwrec to avoid display errors---specifically with msec
     * fields and eTime. */
    RWREC_CLEAR(&rwrec);

    /* Initialize the protocol to 1 (ICMP), so that if the user has
     * requested ICMP type/code but the protocol is not part of the
     * key, we still get ICMP values. */
    rwRecSetProto(&rwrec, IPPROTO_ICMP);

#if SK_ENABLE_IPV6
    if (ipv6_policy > SK_IPV6POLICY_MIX) {
        /* Force records to be in IPv6 format */
        rwRecSetIPv6(&rwrec);
    }
#endif /* SK_ENABLE_IPV6 */

    /* unpack the key into 'rwrec' */
    if (app_flags.presorted_input) {
        /* decode a key that is in network byte-order */
        for (i = 0; i < key_num_fields; ++i) {
            switch (key_fields[i].kf_id) {
              case RWREC_FIELD_SIP:
#if SK_ENABLE_IPV6
                MAYBE_KEY_TO_REC_IPV6(rwRecMemSetSIPv6, rwRecSetSIPv4,
                                      &rwrec, key, i);
#endif  /* SK_ENABLE_IPV6 */
                KEY_TO_REC_NETWORK_32(rwRecSetSIPv4, &rwrec, key, i);
                break;

              case RWREC_FIELD_DIP:
#if SK_ENABLE_IPV6
                MAYBE_KEY_TO_REC_IPV6(rwRecMemSetDIPv6, rwRecSetDIPv4,
                                      &rwrec, key, i);
#endif  /* SK_ENABLE_IPV6 */
                KEY_TO_REC_NETWORK_32(rwRecSetDIPv4, &rwrec, key, i);
                break;

              case RWREC_FIELD_NHIP:
#if SK_ENABLE_IPV6
                MAYBE_KEY_TO_REC_IPV6(rwRecMemSetNhIPv6, rwRecSetNhIPv4,
                                      &rwrec, key, i);
#endif  /* SK_ENABLE_IPV6 */
                KEY_TO_REC_NETWORK_32(rwRecSetNhIPv4, &rwrec, key, i);
                break;

              case RWREC_FIELD_SPORT:
                KEY_TO_REC_NETWORK_16(rwRecSetSPort, &rwrec, key, i);
                break;

              case RWREC_FIELD_DPORT:
              case RWREC_FIELD_ICMP_TYPE_CODE:
                KEY_TO_REC_NETWORK_16(rwRecSetDPort, &rwrec, key, i);
                break;

              case RWREC_FIELD_PROTO:
                KEY_TO_REC_NETWORK_08(rwRecSetProto, &rwrec, key, i);
                break;

              case RWREC_FIELD_PKTS:
                KEY_TO_REC_NETWORK_32(rwRecSetPkts, &rwrec, key, i);
                break;

              case RWREC_FIELD_BYTES:
                KEY_TO_REC_NETWORK_32(rwRecSetBytes, &rwrec, key, i);
                break;

              case RWREC_FIELD_FLAGS:
                KEY_TO_REC_NETWORK_08(rwRecSetFlags, &rwrec, key, i);
                break;

              case RWREC_FIELD_SID:
                KEY_TO_REC_NETWORK_16(rwRecSetSensor, &rwrec, key, i);
                break;

              case RWREC_FIELD_INPUT:
                KEY_TO_REC_NETWORK_16(rwRecSetInput, &rwrec, key, i);
                break;

              case RWREC_FIELD_OUTPUT:
                KEY_TO_REC_NETWORK_16(rwRecSetOutput, &rwrec, key, i);
                break;

              case RWREC_FIELD_INIT_FLAGS:
                KEY_TO_REC_NETWORK_08(rwRecSetInitFlags, &rwrec, key, i);
                break;

              case RWREC_FIELD_REST_FLAGS:
                KEY_TO_REC_NETWORK_08(rwRecSetRestFlags, &rwrec, key, i);
                break;

              case RWREC_FIELD_TCP_STATE:
                KEY_TO_REC_NETWORK_08(rwRecSetTcpState, &rwrec, key, i);
                break;

              case RWREC_FIELD_APPLICATION:
                KEY_TO_REC_NETWORK_16(rwRecSetApplication, &rwrec, key, i);
                break;

              case RWREC_FIELD_FTYPE_CLASS:
              case RWREC_FIELD_FTYPE_TYPE:
                KEY_TO_REC_NETWORK_08(rwRecSetFlowType, &rwrec, key, i);
                break;

              case RWREC_FIELD_STIME:
              case RWREC_FIELD_STIME_MSEC:
              case RWREC_FIELD_ELAPSED:
              case RWREC_FIELD_ELAPSED_MSEC:
              case RWREC_FIELD_ETIME:
              case RWREC_FIELD_ETIME_MSEC:
                key_fields[i].kf_k2r(&rwrec, KEY_FIELD_PTR(key, i));
                break;

              default:
                assert(key_fields[i].kf_field_handle);
                err = skPluginFieldRunBinToTextFn(
                    key_fields[i].kf_field_handle,
                    key_fields[i].kf_textbuf,
                    sizeof(key_fields[i].kf_textbuf),
                    KEY_FIELD_PTR(key, i));
                if (err != SKPLUGIN_OK) {
                    const char **name;
                    skPluginFieldName(key_fields[i].kf_field_handle, &name);
                    skAppPrintErr(("Plugin-based field %s failed "
                                   "converting to text with error code %d"),
                                  name[0], err);
                    appExit(EXIT_FAILURE);
                }
                break;
            }
        }
    } else {
        /* decode a key that is in native byte-order */
        for (i = 0; i < key_num_fields; ++i) {
            switch (key_fields[i].kf_id) {
              case RWREC_FIELD_SIP:
#if SK_ENABLE_IPV6
                MAYBE_KEY_TO_REC_IPV6(rwRecMemSetSIPv6, rwRecSetSIPv4,
                                      &rwrec, key, i);
#endif  /* SK_ENABLE_IPV6 */
                KEY_TO_REC_NATIVE_32(rwRecSetSIPv4, &rwrec, key, i);
                break;

              case RWREC_FIELD_DIP:
#if SK_ENABLE_IPV6
                MAYBE_KEY_TO_REC_IPV6(rwRecMemSetDIPv6, rwRecSetDIPv4,
                                      &rwrec, key, i);
#endif  /* SK_ENABLE_IPV6 */
                KEY_TO_REC_NATIVE_32(rwRecSetDIPv4, &rwrec, key, i);
                break;

              case RWREC_FIELD_NHIP:
#if SK_ENABLE_IPV6
                MAYBE_KEY_TO_REC_IPV6(rwRecMemSetNhIPv6, rwRecSetNhIPv4,
                                      &rwrec, key, i);
#endif  /* SK_ENABLE_IPV6 */
                KEY_TO_REC_NATIVE_32(rwRecSetNhIPv4, &rwrec, key, i);
                break;

              case RWREC_FIELD_SPORT:
                KEY_TO_REC_NATIVE_16(rwRecSetSPort, &rwrec, key, i);
                break;

              case RWREC_FIELD_DPORT:
              case RWREC_FIELD_ICMP_TYPE_CODE:
                KEY_TO_REC_NATIVE_16(rwRecSetDPort, &rwrec, key, i);
                break;

              case RWREC_FIELD_PROTO:
                KEY_TO_REC_NATIVE_08(rwRecSetProto, &rwrec, key, i);
                break;

              case RWREC_FIELD_PKTS:
                KEY_TO_REC_NATIVE_32(rwRecSetPkts, &rwrec, key, i);
                break;

              case RWREC_FIELD_BYTES:
                KEY_TO_REC_NATIVE_32(rwRecSetBytes, &rwrec, key, i);
                break;

              case RWREC_FIELD_FLAGS:
                KEY_TO_REC_NATIVE_08(rwRecSetFlags, &rwrec, key, i);
                break;

              case RWREC_FIELD_SID:
                KEY_TO_REC_NATIVE_16(rwRecSetSensor, &rwrec, key, i);
                break;

              case RWREC_FIELD_INPUT:
                KEY_TO_REC_NATIVE_16(rwRecSetInput, &rwrec, key, i);
                break;

              case RWREC_FIELD_OUTPUT:
                KEY_TO_REC_NATIVE_16(rwRecSetOutput, &rwrec, key, i);
                break;

              case RWREC_FIELD_INIT_FLAGS:
                KEY_TO_REC_NATIVE_08(rwRecSetInitFlags, &rwrec, key, i);
                break;

              case RWREC_FIELD_REST_FLAGS:
                KEY_TO_REC_NATIVE_08(rwRecSetRestFlags, &rwrec, key, i);
                break;

              case RWREC_FIELD_TCP_STATE:
                KEY_TO_REC_NATIVE_08(rwRecSetTcpState, &rwrec, key, i);
                break;

              case RWREC_FIELD_APPLICATION:
                KEY_TO_REC_NATIVE_16(rwRecSetApplication, &rwrec, key, i);
                break;

              case RWREC_FIELD_FTYPE_CLASS:
              case RWREC_FIELD_FTYPE_TYPE:
                KEY_TO_REC_NATIVE_08(rwRecSetFlowType, &rwrec, key, i);
                break;

              case RWREC_FIELD_STIME:
              case RWREC_FIELD_STIME_MSEC:
              case RWREC_FIELD_ELAPSED:
              case RWREC_FIELD_ELAPSED_MSEC:
              case RWREC_FIELD_ETIME:
              case RWREC_FIELD_ETIME_MSEC:
                key_fields[i].kf_k2r(&rwrec, KEY_FIELD_PTR(key, i));
                break;

              default:
                assert(key_fields[i].kf_field_handle);
                err = skPluginFieldRunBinToTextFn(
                    key_fields[i].kf_field_handle,
                    key_fields[i].kf_textbuf,
                    sizeof(key_fields[i].kf_textbuf),
                    KEY_FIELD_PTR(key, i));
                if (err != SKPLUGIN_OK) {
                    const char **name;
                    skPluginFieldName(key_fields[i].kf_field_handle, &name);
                    skAppPrintErr(("Plugin-based field %s failed "
                                   "converting to text with error code %d"),
                                  name[0], err);
                    appExit(EXIT_FAILURE);
                }
                break;
            }
        }
    }

    /* print key fields */
    rwAsciiPrintRec(ascii_str, &rwrec);

    /* print value fields */
    for (i = 0; i < value_num_fields; ++i) {
        switch (value_fields[i].vf_id) {
          case VALUE_PLUGIN:
            err = skPluginFieldRunBinToTextFn(value_fields[i].vf_plugin_field,
                                              buf, sizeof(buf),
                                              VAL_FIELD_GET_PTR(values, i));
            if (err != SKPLUGIN_OK) {
                const char **name;
                skPluginFieldName(value_fields[i].vf_plugin_field, &name);
                skAppPrintErr(("Plugin-based field %s failed "
                               "converting to text with error code %d"),
                              name[0], err);
                appExit(EXIT_FAILURE);
            }
            break;

#if  RWSTATS_USE_MEMCPY
          case VALUE_BYTES:
          case DISTINCT_SIP:
          case DISTINCT_DIP:
            VAL_FIELD_MEMGET(values, i, &val64);
            sprintf(buf, ("%" PRIu64), val64);
            break;

          case VALUE_PACKETS:
          case VALUE_FLOWS:
            VAL_FIELD_MEMGET(values, i, &val32);
            sprintf(buf, ("%" PRIu32), val32);
            break;

#else
          case VALUE_BYTES:
          case DISTINCT_SIP:
          case DISTINCT_DIP:
            sprintf(buf, ("%" PRIu64), *VAL_FIELD_GET_PTR_U64(values, i));
            break;

          case VALUE_PACKETS:
          case VALUE_FLOWS:
            sprintf(buf, ("%" PRIu32), *VAL_FIELD_GET_PTR_U32(values, i));
            break;
#endif  /* RWSTATS_USE_MEMCPY */
        }

        if (app_flags.no_columns) {
            fprintf(output.fp, "%c%s", delimiter, buf);
        } else {
            fprintf(output.fp, "%c%*s",
                    delimiter, value_fields[i].vf_text_len, buf);
        }
    }
}


static void rwstatsPrintHeap(skheap_t *heap)
{
    skheapiterator_t *itheap;
    uint8_t *heap_ptr;
    double cumul_pct = 0.0;
    double percent;
    uint64_t val64;
    uint32_t val32;

    /* print the headings and column titles */
    topnPrintHeader();

    skHeapSortEntries(heap);

    itheap = skHeapIteratorCreate(heap, -1);

    while (skHeapIteratorNext(itheap, (skheapnode_t*)&heap_ptr)
           != HEAP_NO_MORE_ENTRIES)
    {
        writeAsciiRecord(HEAP_NODE_KEY_PTR(heap_ptr),
                         HEAP_NODE_VAL_PTR(heap_ptr));

        if (app_flags.no_percents) {
            fprintf(output.fp, "%s\n", final_delim);
            continue;
        }

        switch (value_fields[0].vf_id) {
          case VALUE_BYTES:
            memcpy(&val64, HEAP_NODE_VAL_PTR(heap_ptr), sizeof(uint64_t));
            percent = 100.0 * (double)val64 / value_total;
            break;
          case VALUE_PACKETS:
          case VALUE_FLOWS:
            memcpy(&val32, HEAP_NODE_VAL_PTR(heap_ptr), sizeof(val32));
            percent = 100.0 * (double)val32 / value_total;
            break;
          default:
            fprintf(output.fp, ("%c%*c%c%*c%s\n"),
                    delimiter, width[WIDTH_PCT], '?', delimiter,
                    width[WIDTH_PCT], '?', final_delim);
            continue;
        }

        cumul_pct += percent;
        fprintf(output.fp, ("%c%*.6f%c%*.6f%s\n"),
                delimiter, width[WIDTH_PCT], percent, delimiter,
                width[WIDTH_PCT], cumul_pct, final_delim);
    }

    skHeapIteratorFree(itheap);
}


/*
 *  addRecToValues(rwrec, merged);
 *
 *    For each value field that is being computed---determined by the
 *    global value_fields[]---add the values from the current 'rwrec'
 *    to the values that are packed into the byte-array 'merged'.
 */
static void addRecToValues(
    const rwRec    *rwrec,
    uint8_t        *merged)
{
#if  RWSTATS_USE_MEMCPY
    uint64_t val64;
    uint32_t val32;
#endif
    size_t i;
    skplugin_err_t err;

    switch (value_fields[0].vf_id) {
      case VALUE_BYTES:
        value_total += rwRecGetBytes(rwrec);
        break;

      case VALUE_PACKETS:
        value_total += rwRecGetPkts(rwrec);
        break;

      case VALUE_FLOWS:
        ++value_total;
        break;

      default:
        break;
    }

    for (i = 0; i < value_num_fields; ++i) {
        switch (value_fields[i].vf_id) {
          case VALUE_PLUGIN:
            /* Plugin-based value */
            err = skPluginFieldRunRecToBinFn(value_fields[i].vf_plugin_field,
                                             VAL_FIELD_GET_PTR(merged, i),
                                             rwrec, NULL);
            if (err != SKPLUGIN_OK) {
                const char **name;
                skPluginFieldName(value_fields[i].vf_plugin_field, &name);
                skAppPrintErr(("Plugin-based field %s failed "
                               "converting to binary with error code %d"),
                              name[0], err);
                appExit(EXIT_FAILURE);
            }
            break;

          case DISTINCT_SIP:
          case DISTINCT_DIP:
            /* ignore for now */
            break;

#if  RWSTATS_USE_MEMCPY
          case VALUE_BYTES:
            VAL_FIELD_MEMGET(merged, i, &val64);
            val64 += rwRecGetBytes(rwrec);
            VAL_FIELD_MEMSET(merged, i, &val64);
            break;

          case VALUE_PACKETS:
            VAL_FIELD_MEMGET(merged, i, &val32);
            val32 += rwRecGetPkts(rwrec);
            VAL_FIELD_MEMSET(merged, i, &val32);
            break;

          case VALUE_FLOWS:
            VAL_FIELD_MEMGET(merged, i, &val32);
            ++val32;
            VAL_FIELD_MEMSET(merged, i, &val32);
            break;
#else
          case VALUE_BYTES:
            *VAL_FIELD_GET_PTR_U64(merged, i) += rwRecGetBytes(rwrec);
            break;

          case VALUE_PACKETS:
            *VAL_FIELD_GET_PTR_U32(merged, i) += rwRecGetPkts(rwrec);
            break;

          case VALUE_FLOWS:
            ++*VAL_FIELD_GET_PTR_U32(merged, i);
            break;
#endif  /* RWSTATS_USE_MEMCPY */
        }
    }
}


/*
 *  mergeValues(merged, addend);
 *
 *    For each value being computed, add the values in 'addend' to the
 *    total values in 'merged'.  Both 'addend' and 'merged' contain
 *    the values as byte-arrays specified by the global value_fields[]
 *    array.
 */
static void mergeValues(
    uint8_t        *merged,
    const uint8_t  *addend)
{
#if  RWSTATS_USE_MEMCPY
    uint64_t merged64;
    uint64_t addend64;
    uint32_t merged32;
    uint32_t addend32;
#endif
    skplugin_err_t err;
    size_t i;

    /* add this entry's values */
    for (i = 0; i < value_num_fields; ++i) {
        switch (value_fields[i].vf_id) {
          case VALUE_PLUGIN:
            /* merge plugin values */
            err = skPluginFieldRunBinMergeFn(value_fields[i].vf_plugin_field,
                                             VAL_FIELD_GET_PTR(merged, i),
                                             VAL_FIELD_GET_PTR(addend, i));
            if (err != SKPLUGIN_OK) {
                const char **name;
                skPluginFieldName(value_fields[i].vf_plugin_field, &name);
                skAppPrintErr(("Plugin-based field %s failed "
                               "merging binary values with error code %d"),
                              name[0], err);
                appExit(EXIT_FAILURE);
            }
            break;

          case DISTINCT_SIP:
          case DISTINCT_DIP:
            /* ignore for now */
            break;

#if  RWSTATS_USE_MEMCPY
          case VALUE_BYTES:
            VAL_FIELD_MEMGET(merged, i, &merged64);
            VAL_FIELD_MEMGET(addend, i, &addend64);
            merged64 += addend64;
            VAL_FIELD_MEMSET(merged, i, &merged64);
            break;

          case VALUE_PACKETS:
          case VALUE_FLOWS:
            VAL_FIELD_MEMGET(merged, i, &merged32);
            VAL_FIELD_MEMGET(addend, i, &addend32);
            merged32 += addend32;
            VAL_FIELD_MEMSET(merged, i, &merged32);
            break;
#else  /* RWSTATS_USE_MEMCPY */
          case VALUE_BYTES:
            *VAL_FIELD_GET_PTR_U64(merged, i)
                += *VAL_FIELD_GET_PTR_U64(addend, i);
            break;

          case VALUE_PACKETS:
          case VALUE_FLOWS:
            *VAL_FIELD_GET_PTR_U32(merged, i)
                += *VAL_FIELD_GET_PTR_U32(addend, i);
            break;
#endif  /* RWSTATS_USE_MEMCPY */
        }
    }
}


/*
 *  status = fillRecordAndKey(rwios, rwrec, key);
 *
 *    Reads a flow record from 'rwios' into the memory pointed at by
 *    'rwrec', computes the key based on the global key_fields[]
 *    settings, and fills in the parameter 'key' with that value.
 *    Return 1 if a record was read, or 0 if it was not.
 */
static int fillRecordAndKey(
    skstream_t *rwios,
    rwRec      *rwrec,
    uint8_t    *key)
{
    skplugin_err_t err;
    size_t i;
    int rv;

    /* get next record */
    rv = skStreamReadRecord(rwios, rwrec);
    if (rv) {
        if (rv != SKSTREAM_ERR_EOF) {
            skStreamPrintLastErr(rwios, rv, &skAppPrintErr);
        }
        return 0;
    }
    ++record_count;

    if (cidr_sip) {
        rwRecSetSIPv4(rwrec, rwRecGetSIPv4(rwrec) & cidr_sip);
    }
    if (cidr_dip) {
        rwRecSetDIPv4(rwrec, rwRecGetDIPv4(rwrec) & cidr_dip);
    }

    if (app_flags.presorted_input) {
        /* create key in network byte-order */
        for (i = 0; i < key_num_fields; ++i) {
            switch (key_fields[i].kf_id) {
              case RWREC_FIELD_SIP:
#if SK_ENABLE_IPV6
                if (ipv6_policy >= SK_IPV6POLICY_MIX) {
                    rwRecMemGetSIPv6(rwrec, KEY_FIELD_PTR(key, i));
                    break;
                }
#endif  /* SK_ENABLE_IPV6 */
                REC_TO_KEY_NETWORK_32(rwRecGetSIPv4, rwrec, key, i);
                break;

              case RWREC_FIELD_DIP:
#if SK_ENABLE_IPV6
                if (ipv6_policy >= SK_IPV6POLICY_MIX) {
                    rwRecMemGetDIPv6(rwrec, KEY_FIELD_PTR(key, i));
                    break;
                }
#endif  /* SK_ENABLE_IPV6 */
                REC_TO_KEY_NETWORK_32(rwRecGetDIPv4, rwrec, key, i);
                break;

              case RWREC_FIELD_NHIP:
#if SK_ENABLE_IPV6
                if (ipv6_policy >= SK_IPV6POLICY_MIX) {
                    rwRecMemGetNhIPv6(rwrec, KEY_FIELD_PTR(key, i));
                    break;
                }
#endif  /* SK_ENABLE_IPV6 */
                REC_TO_KEY_NETWORK_32(rwRecGetNhIPv4, rwrec, key, i);
                break;

              case RWREC_FIELD_SPORT:
                REC_TO_KEY_NETWORK_16(rwRecGetSPort, rwrec, key, i);
                break;

              case RWREC_FIELD_DPORT:
              case RWREC_FIELD_ICMP_TYPE_CODE:
                REC_TO_KEY_NETWORK_16(rwRecGetDPort, rwrec, key, i);
                break;

              case RWREC_FIELD_PROTO:
                REC_TO_KEY_NETWORK_08(rwRecGetProto, rwrec, key, i);
                break;

              case RWREC_FIELD_PKTS:
                REC_TO_KEY_NETWORK_32(rwRecGetPkts, rwrec, key, i);
                break;

              case RWREC_FIELD_BYTES:
                REC_TO_KEY_NETWORK_32(rwRecGetBytes, rwrec, key, i);
                break;

              case RWREC_FIELD_FLAGS:
                REC_TO_KEY_NETWORK_08(rwRecGetFlags, rwrec, key, i);
                break;

              case RWREC_FIELD_SID:
                REC_TO_KEY_NETWORK_16(rwRecGetSensor, rwrec, key, i);
                break;

              case RWREC_FIELD_INPUT:
                REC_TO_KEY_NETWORK_16(rwRecGetInput, rwrec, key, i);
                break;

              case RWREC_FIELD_OUTPUT:
                REC_TO_KEY_NETWORK_16(rwRecGetOutput, rwrec, key, i);
                break;

              case RWREC_FIELD_INIT_FLAGS:
                REC_TO_KEY_NETWORK_08(rwRecGetInitFlags, rwrec, key, i);
                break;

              case RWREC_FIELD_REST_FLAGS:
                REC_TO_KEY_NETWORK_08(rwRecGetRestFlags, rwrec, key, i);
                break;

              case RWREC_FIELD_TCP_STATE:
                REC_TO_KEY_NETWORK_08(rwRecGetTcpState, rwrec, key, i);
                break;

              case RWREC_FIELD_APPLICATION:
                REC_TO_KEY_NETWORK_16(rwRecGetApplication, rwrec, key, i);
                break;

              case RWREC_FIELD_FTYPE_CLASS:
              case RWREC_FIELD_FTYPE_TYPE:
                REC_TO_KEY_NETWORK_08(rwRecGetFlowType, rwrec, key, i);
                break;

              case RWREC_FIELD_STIME:
              case RWREC_FIELD_STIME_MSEC:
              case RWREC_FIELD_ELAPSED:
              case RWREC_FIELD_ELAPSED_MSEC:
              case RWREC_FIELD_ETIME:
              case RWREC_FIELD_ETIME_MSEC:
                key_fields[i].kf_r2k(rwrec, KEY_FIELD_PTR(key, i));
                break;

              default:
                assert(key_fields[i].kf_field_handle);
                err = skPluginFieldRunRecToBinFn(key_fields[i].kf_field_handle,
                                                 KEY_FIELD_PTR(key, i), rwrec,
                                                 NULL);
                if (err != SKPLUGIN_OK) {
                    const char **name;
                    skPluginFieldName(key_fields[i].kf_field_handle, &name);
                    skAppPrintErr(("Plugin-based field %s failed "
                                   "converting to binary with error code %d"),
                                  name[0], err);
                    appExit(EXIT_FAILURE);
                }
                break;
            }
        }
    } else {
        /* key can be in native byte-order */
        for (i = 0; i < key_num_fields; ++i) {
            switch (key_fields[i].kf_id) {
              case RWREC_FIELD_SIP:
#if SK_ENABLE_IPV6
                if (ipv6_policy >= SK_IPV6POLICY_MIX) {
                    rwRecMemGetSIPv6(rwrec, KEY_FIELD_PTR(key, i));
                    break;
                }
#endif  /* SK_ENABLE_IPV6 */
                REC_TO_KEY_NATIVE_32(rwRecGetSIPv4, rwrec, key, i);
                break;

              case RWREC_FIELD_DIP:
#if SK_ENABLE_IPV6
                if (ipv6_policy >= SK_IPV6POLICY_MIX) {
                    rwRecMemGetDIPv6(rwrec, KEY_FIELD_PTR(key, i));
                    break;
                }
#endif  /* SK_ENABLE_IPV6 */
                REC_TO_KEY_NATIVE_32(rwRecGetDIPv4, rwrec, key, i);
                break;

              case RWREC_FIELD_NHIP:
#if SK_ENABLE_IPV6
                if (ipv6_policy >= SK_IPV6POLICY_MIX) {
                    rwRecMemGetNhIPv6(rwrec, KEY_FIELD_PTR(key, i));
                    break;
                }
#endif  /* SK_ENABLE_IPV6 */
                REC_TO_KEY_NATIVE_32(rwRecGetNhIPv4, rwrec, key, i);
                break;

              case RWREC_FIELD_SPORT:
                REC_TO_KEY_NATIVE_16(rwRecGetSPort, rwrec, key, i);
                break;

              case RWREC_FIELD_DPORT:
              case RWREC_FIELD_ICMP_TYPE_CODE:
                REC_TO_KEY_NATIVE_16(rwRecGetDPort, rwrec, key, i);
                break;

              case RWREC_FIELD_PROTO:
                REC_TO_KEY_NATIVE_08(rwRecGetProto, rwrec, key, i);
                break;

              case RWREC_FIELD_PKTS:
                REC_TO_KEY_NATIVE_32(rwRecGetPkts, rwrec, key, i);
                break;

              case RWREC_FIELD_BYTES:
                REC_TO_KEY_NATIVE_32(rwRecGetBytes, rwrec, key, i);
                break;

              case RWREC_FIELD_FLAGS:
                REC_TO_KEY_NATIVE_08(rwRecGetFlags, rwrec, key, i);
                break;

              case RWREC_FIELD_SID:
                REC_TO_KEY_NATIVE_16(rwRecGetSensor, rwrec, key, i);
                break;

              case RWREC_FIELD_INPUT:
                REC_TO_KEY_NATIVE_16(rwRecGetInput, rwrec, key, i);
                break;

              case RWREC_FIELD_OUTPUT:
                REC_TO_KEY_NATIVE_16(rwRecGetOutput, rwrec, key, i);
                break;

              case RWREC_FIELD_INIT_FLAGS:
                REC_TO_KEY_NATIVE_08(rwRecGetInitFlags, rwrec, key, i);
                break;

              case RWREC_FIELD_REST_FLAGS:
                REC_TO_KEY_NATIVE_08(rwRecGetRestFlags, rwrec, key, i);
                break;

              case RWREC_FIELD_TCP_STATE:
                REC_TO_KEY_NATIVE_08(rwRecGetTcpState, rwrec, key, i);
                break;

              case RWREC_FIELD_APPLICATION:
                REC_TO_KEY_NATIVE_16(rwRecGetApplication, rwrec, key, i);
                break;

              case RWREC_FIELD_FTYPE_CLASS:
              case RWREC_FIELD_FTYPE_TYPE:
                REC_TO_KEY_NATIVE_08(rwRecGetFlowType, rwrec, key, i);
                break;

              case RWREC_FIELD_STIME:
              case RWREC_FIELD_STIME_MSEC:
              case RWREC_FIELD_ELAPSED:
              case RWREC_FIELD_ELAPSED_MSEC:
              case RWREC_FIELD_ETIME:
              case RWREC_FIELD_ETIME_MSEC:
                key_fields[i].kf_r2k(rwrec, KEY_FIELD_PTR(key, i));
                break;

              default:
                assert(key_fields[i].kf_field_handle);
                err = skPluginFieldRunRecToBinFn(key_fields[i].kf_field_handle,
                                                 KEY_FIELD_PTR(key, i), rwrec,
                                                 NULL);
                if (err != SKPLUGIN_OK) {
                    const char **name;
                    skPluginFieldName(key_fields[i].kf_field_handle, &name);
                    skAppPrintErr(("Plugin-based field %s failed "
                                   "converting to binary with error code %d"),
                                  name[0], err);
                    appExit(EXIT_FAILURE);
                }
                break;
            }
        }
    }

    return 1;
}


/*
 *  status = fillRecordAndKeyForHash(rwrec, key);
 *
 *    Fills in 'rwrec' with a flow read from the input.  Processes
 *    stdin or the files listed on the command line.  Computes the key
 *    based on the global key_fields[] and distinct_fields[] settings
 *    and fills in the parameter 'key' with that value.
 */
static int fillRecordAndKeyForHash(
    rwRec      *rwrec,
    uint8_t    *key)
{
    static skstream_t *rwios = NULL;
    size_t d;
    int rv;

    do {
        if (rwios && fillRecordAndKey(rwios, rwrec, key)) {
            /* Add the distinct fields to the key. */
            for (d = 0; d < distinct_num_fields; ++d) {
                distinct_fields[d].df_r2k(rwrec,
                                          key + distinct_fields[d].df_offset);
            }
            return 1;
        }

        /* no more records; close input file */
        skStreamDestroy(&rwios);

        /* goto next file */
        rv = appNextInput(&rwios);
    } while (rv == 0);

    if (rv != 1) {
        /* unexpected error */
        appExit(EXIT_FAILURE);
    }

    /* out of files */
    return 0;
}



static void rwstatsComputeLimits(
    const HashTable    *ht)
{
    HASH_ITER ithash;
    uint8_t *hash_key;
    uint8_t *hash_val;
    uint64_t val64;
    uint32_t val32;
    int dir_type = DIR_AND_TYPE(direction, value_fields[0].vf_id);

    assert(limit.value[limit.type] > 0);
    assert(0 == distinct_num_fields);

    /* number of entries is easy to compute */
    limit.entries = hashlib_count_entries(ht);

    /* Given the statistic the user wants, convert the "limit" value
     * to actual topN or bottomN */
    switch (limit.type) {
      case RWSTATS_COUNT:
        /* user gave a count, we are set */
        return;

      case RWSTATS_THRESHOLD:
        /* we need to convert the volume (e.g. number of records) to a
         * count; we do that below */
        break;

      case RWSTATS_PERCENTAGE:
        /* we need to convert percentage to a volume, and then the
         * volume to a count */
        limit.value[1] = value_total * limit.value[2] / 100;
        break;
    }

    switch (value_fields[0].vf_id) {
      case VALUE_BYTES:
      case VALUE_PACKETS:
      case VALUE_FLOWS:
        break;

      case DISTINCT_SIP:
      case DISTINCT_DIP:
      case VALUE_PLUGIN:
        skAbortBadCase(value_fields[0].vf_id);
    }

    /* Iterate over the hash-table. */
    ithash = hashlib_create_iterator(ht);

    while (hashlib_iterate(ht, &ithash, &hash_key, &hash_val)
           != ERR_NOMOREENTRIES)
    {
        switch (dir_type) {
          case DIR_AND_TYPE(RWSTATS_DIR_TOP, VALUE_BYTES):
            memcpy(&val64, hash_val, sizeof(uint64_t));
            if (val64 >= limit.value[1]) {
                ++limit.value[0];
            }
            break;
          case DIR_AND_TYPE(RWSTATS_DIR_TOP, VALUE_PACKETS):
          case DIR_AND_TYPE(RWSTATS_DIR_TOP, VALUE_FLOWS):
            memcpy(&val32, hash_val, sizeof(uint32_t));
            if (val32 >= limit.value[1]) {
                ++limit.value[0];
            }
            break;
          case DIR_AND_TYPE(RWSTATS_DIR_BOTTOM, VALUE_BYTES):
            memcpy(&val64, hash_val, sizeof(uint64_t));
            if (val64 <= limit.value[1]) {
                ++limit.value[0];
            }
            break;
          case DIR_AND_TYPE(RWSTATS_DIR_BOTTOM, VALUE_PACKETS):
          case DIR_AND_TYPE(RWSTATS_DIR_BOTTOM, VALUE_FLOWS):
            memcpy(&val32, hash_val, sizeof(uint32_t));
            if (val32 <= limit.value[1]) {
                ++limit.value[0];
            }
            break;
          default:
            skAbortBadCase(dir_type);
        }
    }
}


static void rwstatsComputeLimitsDistinct(
    const HashTable    *ht)
{
    HASH_ITER ithash;
    uint8_t *hash_key;
    uint8_t *hash_val;
    uint64_t cur_value = 0;
    uint64_t val64;
    uint32_t val32;
    uint8_t *cached_key = scratch;
    uint32_t cached_ip = 0;
    uint32_t tmp_ip = 0;

    assert(limit.value[limit.type] > 0);

    /* Iterate over the hash-table. */
    ithash = hashlib_create_iterator(ht);

    /* get first key/value from hash table */
    if (hashlib_iterate(ht, &ithash, &hash_key, &hash_val)
        == ERR_NOMOREENTRIES)
    {
        /* no data.  done. */
        return;
    }

    /* we have at least one entry */
    limit.entries = 1;

    /* Given the statistic the user wants, convert the "limit" value
     * to actual topN or bottomN */
    switch (limit.type) {
      case RWSTATS_COUNT:
        /* user gave a count; we only need to calculate the number of
         * bins */
        break;

      case RWSTATS_THRESHOLD:
        /* we need to convert the volume (e.g. number of records) to a
         * count */
        switch (value_fields[0].vf_id) {
          case VALUE_BYTES:
            memcpy(&val64, hash_val, sizeof(uint64_t));
            cur_value = val64;
            break;
          case VALUE_PACKETS:
          case VALUE_FLOWS:
            memcpy(&val32, hash_val, sizeof(uint32_t));
            cur_value = val32;
            break;
          case DISTINCT_SIP:
          case DISTINCT_DIP:
            if (distinct_num_fields > 1) {
                GET_DISTINCT_IP(hash_key, 0, &cached_ip);
            }
            cur_value = 1;
            break;
          case VALUE_PLUGIN:
            skAbortBadCase(value_fields[0].vf_id);
        }
        break;

      case RWSTATS_PERCENTAGE:
        /* we need to convert percentage to a volume, and then the
         * volume to a count */
        limit.value[1] = value_total * limit.value[2] / 100;
        switch (value_fields[0].vf_id) {
          case VALUE_BYTES:
            memcpy(&val64, hash_val, sizeof(uint64_t));
            cur_value = val64;
            break;
          case VALUE_PACKETS:
          case VALUE_FLOWS:
            memcpy(&val32, hash_val, sizeof(uint32_t));
            cur_value = val32;
            break;
          case DISTINCT_SIP:
          case DISTINCT_DIP:
          case VALUE_PLUGIN:
            skAbortBadCase(value_fields[0].vf_id);
        }
        break;
    }

    /* cache the key */
    memcpy(cached_key, hash_key, key_byte_width);

    /* process the remaining hash entries */
    while (hashlib_iterate(ht, &ithash, &hash_key, &hash_val)
           != ERR_NOMOREENTRIES)
    {
        /* compare keys */
        if (0 == memcmp(cached_key, hash_key, key_byte_width)) {
            if (RWSTATS_COUNT == limit.type) {
                continue;
            }

            /* keys match; add current 'hash_val' to 'cur_value' */
            switch (value_fields[0].vf_id) {
              case VALUE_BYTES:
                memcpy(&val64, hash_val, sizeof(uint64_t));
                cur_value += val64;
                break;
              case VALUE_PACKETS:
              case VALUE_FLOWS:
                memcpy(&val32, hash_val, sizeof(uint32_t));
                cur_value += val32;
                break;
              case DISTINCT_SIP:
              case DISTINCT_DIP:
                if (distinct_num_fields == 1) {
                    ++cur_value;
                } else {
                    GET_DISTINCT_IP(hash_key, 0, &tmp_ip);
                    if (tmp_ip != cached_ip) {
                        ++cur_value;
                        cached_ip = tmp_ip;
                    }
                }
                break;
              case VALUE_PLUGIN:
                skAbortBadCase(value_fields[0].vf_id);
            }
        } else {
            /* keys differ */
            memcpy(cached_key, hash_key, key_byte_width);
            ++limit.entries;

            /* check whether value meets threshold */
            if (RWSTATS_COUNT == limit.type) {
                continue;
            }
            if (RWSTATS_DIR_TOP == direction) {
                if (cur_value >= limit.value[1]) {
                    ++limit.value[0];
                }
            } else {
                if (cur_value <= limit.value[1]) {
                    ++limit.value[0];
                }
            }

            /* reset 'cur_value' with value from this bin */
            switch (value_fields[0].vf_id) {
              case VALUE_BYTES:
                memcpy(&val64, hash_val, sizeof(uint64_t));
                cur_value = val64;
                break;
              case VALUE_PACKETS:
              case VALUE_FLOWS:
                memcpy(&val32, hash_val, sizeof(uint32_t));
                cur_value = val32;
                break;
              case DISTINCT_SIP:
              case DISTINCT_DIP:
                if (distinct_num_fields > 1) {
                    GET_DISTINCT_IP(hash_key, 0, &cached_ip);
                }
                cur_value = 1;
                break;
              case VALUE_PLUGIN:
                skAbortBadCase(value_fields[0].vf_id);
            }
        }
    }

    if (RWSTATS_COUNT != limit.type) {
        /* handle final value */
        if (RWSTATS_DIR_TOP == direction) {
            if (cur_value >= limit.value[1]) {
                ++limit.value[0];
            }
        } else {
            if (cur_value <= limit.value[1]) {
                ++limit.value[0];
            }
        }
    }
}


/*
 *  cmp = rwstatsCompareCounts{Top,Btm}{32,64}(node1, node2);
 *
 *    The following 4 functions are invoked by the skHeap library to
 *    compare counters.  'node1' and 'node2' are pointers to an
 *    integer value (either a uint32_t or a uint64_t).
 *
 *    For the *Top* functions, return 1, 0, -1 depending on whether
 *    the value in 'node1' is <, ==, > the value in 'node2'.
 *
 *    For the *Btm* functions, return -1, 0, 1 depending on whether
 *    the value in 'node1' is <, ==, > the value in 'node2'.
 */
#if !RWSTATS_USE_MEMCPY
static int rwstatsCompareCountsTop32(
    const skheapnode_t  node1,
    const skheapnode_t  node2)
{
    if (*VAL_FIELD_GET_PTR_U32(HEAP_NODE_VAL_PTR(node1), 0)
        > *VAL_FIELD_GET_PTR_U32(HEAP_NODE_VAL_PTR(node2), 0))
    {
        return -1;
    }
    if (*VAL_FIELD_GET_PTR_U32(HEAP_NODE_VAL_PTR(node1), 0)
        < *VAL_FIELD_GET_PTR_U32(HEAP_NODE_VAL_PTR(node2), 0))
    {
        return 1;
    }
    return 0;
}

static int rwstatsCompareCountsBtm32(
    const skheapnode_t  node1,
    const skheapnode_t  node2)
{
    if (*VAL_FIELD_GET_PTR_U32(HEAP_NODE_VAL_PTR(node1), 0)
        < *VAL_FIELD_GET_PTR_U32(HEAP_NODE_VAL_PTR(node2), 0))
    {
        return -1;
    }
    if (*VAL_FIELD_GET_PTR_U32(HEAP_NODE_VAL_PTR(node1), 0)
        > *VAL_FIELD_GET_PTR_U32(HEAP_NODE_VAL_PTR(node2), 0))
    {
        return 1;
    }
    return 0;
}

static int rwstatsCompareCountsTop64(
    const skheapnode_t  node1,
    const skheapnode_t  node2)
{
    if (*VAL_FIELD_GET_PTR_U64(HEAP_NODE_VAL_PTR(node1), 0)
        > *VAL_FIELD_GET_PTR_U64(HEAP_NODE_VAL_PTR(node2), 0))
    {
        return -1;
    }
    if (*VAL_FIELD_GET_PTR_U64(HEAP_NODE_VAL_PTR(node1), 0)
        < *VAL_FIELD_GET_PTR_U64(HEAP_NODE_VAL_PTR(node2), 0))
    {
        return 1;
    }
    return 0;
}

static int rwstatsCompareCountsBtm64(
    const skheapnode_t  node1,
    const skheapnode_t  node2)
{
    if (*VAL_FIELD_GET_PTR_U64(HEAP_NODE_VAL_PTR(node1), 0)
        < *VAL_FIELD_GET_PTR_U64(HEAP_NODE_VAL_PTR(node2), 0))
    {
        return -1;
    }
    if (*VAL_FIELD_GET_PTR_U64(HEAP_NODE_VAL_PTR(node1), 0)
        > *VAL_FIELD_GET_PTR_U64(HEAP_NODE_VAL_PTR(node2), 0))
    {
        return 1;
    }
    return 0;
}
#else  /* RWSTATS_USE_MEMCPY */
static int rwstatsCompareCountsTop32(
    const skheapnode_t  node1,
    const skheapnode_t  node2)
{
    uint32_t a;
    uint32_t b;
    VAL_FIELD_MEMGET(HEAP_NODE_VAL_PTR(node1), 0, &a);
    VAL_FIELD_MEMGET(HEAP_NODE_VAL_PTR(node2), 0, &b);
    if (a > b) { return -1; }
    if (a < b) { return 1; }
    return 0;
}

static int rwstatsCompareCountsBtm32(
    const skheapnode_t  node1,
    const skheapnode_t  node2)
{
    uint32_t a;
    uint32_t b;
    VAL_FIELD_MEMGET(HEAP_NODE_VAL_PTR(node1), 0, &a);
    VAL_FIELD_MEMGET(HEAP_NODE_VAL_PTR(node2), 0, &b);
    if (a < b) { return -1; }
    if (a > b) { return 1; }
    return 0;
}

static int rwstatsCompareCountsTop64(
    const skheapnode_t  node1,
    const skheapnode_t  node2)
{
    uint64_t a;
    uint64_t b;
    VAL_FIELD_MEMGET(HEAP_NODE_VAL_PTR(node1), 0, &a);
    VAL_FIELD_MEMGET(HEAP_NODE_VAL_PTR(node2), 0, &b);
    if (a > b) { return -1; }
    if (a < b) { return 1; }
    return 0;
}

static int rwstatsCompareCountsBtm64(
    const skheapnode_t  node1,
    const skheapnode_t  node2)
{
    uint64_t a;
    uint64_t b;
    VAL_FIELD_MEMGET(HEAP_NODE_VAL_PTR(node1), 0, &a);
    VAL_FIELD_MEMGET(HEAP_NODE_VAL_PTR(node2), 0, &b);
    if (a < b) { return -1; }
    if (a > b) { return 1; }
    return 0;
}
#endif  /* RWSTATS_USE_MEMCPY */
static int rwstatsComparePluginTop(
    const skheapnode_t  node1,
    const skheapnode_t  node2)
{
    int cmp;
    skplugin_err_t err;
    err = skPluginFieldRunBinCompareFn(
        value_fields[0].vf_plugin_field, &cmp,
        VAL_FIELD_GET_PTR(HEAP_NODE_VAL_PTR(node2),0),
        VAL_FIELD_GET_PTR(HEAP_NODE_VAL_PTR(node1),0));
    if (err != SKPLUGIN_OK) {
        const char **name;
        skPluginFieldName(value_fields[0].vf_plugin_field, &name);
        skAppPrintErr(("Plugin-based field %s failed "
                       "binary comparison with error code %d"), name[0], err);
        appExit(EXIT_FAILURE);
    }
    return cmp;
}

static int rwstatsComparePluginBtm(
    const skheapnode_t  node1,
    const skheapnode_t  node2)
{
    int cmp;
    skplugin_err_t err;
    err = skPluginFieldRunBinCompareFn(
        value_fields[0].vf_plugin_field, &cmp,
        VAL_FIELD_GET_PTR(HEAP_NODE_VAL_PTR(node1),0),
        VAL_FIELD_GET_PTR(HEAP_NODE_VAL_PTR(node2),0));
    if (err != SKPLUGIN_OK) {
        const char **name;
        skPluginFieldName(value_fields[0].vf_plugin_field, &name);
        skAppPrintErr(("Plugin-based field %s failed "
                       "binary comparison with error code %d"), name[0], err);
        appExit(EXIT_FAILURE);
    }
    return cmp;
}


/*
 *  printTableSimple(hash_table);
 *
 *    Just print the key/value pairs from the hash table.  There are
 *    no distinct values to keep track of.
 */
static void printTableSimple(HashTable *ht)
{
    skheapcmpfn_t cmp_fn = NULL;
    skheap_t *heap = NULL;
    uint8_t *heap_ptr;
    uint8_t *newnode;
    uint8_t *newnode_key;
    uint8_t *newnode_val;
    uint32_t heap_num_entries;

    HASH_ITER ithash;
    uint8_t *hash_key;
    uint8_t *hash_val = NULL;

    /* use global scratch space */
    newnode = scratch;
    newnode_key = HEAP_NODE_KEY_PTR(newnode);
    newnode_val = HEAP_NODE_VAL_PTR(newnode);

    /* set comparison function */
    switch (value_fields[0].vf_id) {
      case DISTINCT_SIP:
      case DISTINCT_DIP:
        skAbortBadCase(value_fields[0].vf_id);

      case VALUE_PLUGIN:
        if (direction == RWSTATS_DIR_TOP) {
            cmp_fn = &rwstatsComparePluginTop;
        } else {
            cmp_fn = &rwstatsComparePluginBtm;
        }
        break;
      case VALUE_BYTES:
        if (direction == RWSTATS_DIR_TOP) {
            cmp_fn = &rwstatsCompareCountsTop64;
        } else {
            cmp_fn = &rwstatsCompareCountsBtm64;
        }
        break;
      case VALUE_PACKETS:
      case VALUE_FLOWS:
        if (direction == RWSTATS_DIR_TOP) {
            cmp_fn = &rwstatsCompareCountsTop32;
        } else {
            cmp_fn = &rwstatsCompareCountsBtm32;
        }
        break;
    }

    /* convert a percentage or threshold to a count */
    rwstatsComputeLimits(ht);

    /* did we read data?  is the threshold too high or low? */
    if ((0 == limit.entries) || (0 == limit.value[RWSTATS_COUNT])) {
        topnPrintHeader();
        return;
    }

    /* Create the heap */
    heap = skHeapCreate(cmp_fn, limit.value[RWSTATS_COUNT],
                        heap_node_bytes, NULL);
    if (NULL == heap) {
        skAppPrintErr(("Unable to create heap of %" PRIu64
                       " %" PRIu32 "-byte elements"),
                      limit.value[RWSTATS_COUNT], (uint32_t)heap_node_bytes);
        exit(EXIT_FAILURE);
    }

    /* create the iterator over the hash */
    ithash = hashlib_create_iterator(ht);

    /* put the first topn entries from the hash table into the heap */
    for (heap_num_entries = 0;
         ((heap_num_entries < limit.value[0])
          && (hashlib_iterate(ht, &ithash, &hash_key, &hash_val)
              != ERR_NOMOREENTRIES));
         ++heap_num_entries)
    {
        memcpy(newnode_key, hash_key, key_byte_width);
        memcpy(newnode_val, hash_val, hash_value_bytes);
        skHeapInsert(heap, newnode);
    }

    if (heap_num_entries == 0) {
        /* no data.  done. */
        skHeapFree(heap);
        return;
    }

    /* Get the node at the top of heap and its value.  This is the
     * smallest value in the topN. */
    skHeapPeekTop(heap, (skheapnode_t*)&heap_ptr);

    /* Process the remaining hash table entries */
    while (hashlib_iterate(ht, &ithash, &hash_key, &hash_val)
           != ERR_NOMOREENTRIES)
    {
        memcpy(newnode_val, hash_val, hash_value_bytes);
        if (cmp_fn(heap_ptr, newnode_val) <= 0) {
            continue;
        }

        /* The hash-table element we just read is "better" (for topN,
         * higher than current heap-root's value; for bottomN, lower
         * than current heap-root's value). */
        memcpy(newnode_key, hash_key, key_byte_width);
        skHeapReplaceTop(heap, newnode, NULL);

        /* the top may have changed; get the new top and its IpCount */
        skHeapPeekTop(heap, (skheapnode_t*)&heap_ptr);
    }

    rwstatsPrintHeap(heap);
    skHeapFree(heap);
}


/*
 *  printTableDistinct(hash_table);
 *
 *    Print the entries from the hash table when one or more distinct
 *    fields are requested.  Part of each entry's key is the column we
 *    are computing the 'distinct' count of.
 */
static void printTableDistinct(HashTable *ht)
{
    skheapcmpfn_t cmp_fn = NULL;
    skheap_t *heap = NULL;
    uint8_t *heap_ptr;
    uint8_t *newnode;
    uint8_t *newnode_key;
    uint8_t *newnode_val;
    uint32_t heap_num_entries = 0;
    HASH_ITER ithash;
    uint8_t *hash_key;
    uint8_t *hash_val = NULL;
    uint32_t tmp_ip;
    distinct_value_t distincts[NUM_DISTINCTS];
    int final = 0;
    size_t d;

    /* use global scratch space */
    newnode = scratch;
    newnode_key = HEAP_NODE_KEY_PTR(newnode);
    newnode_val = HEAP_NODE_VAL_PTR(newnode);

    /* set comparison function */
    switch (value_fields[0].vf_id) {
      case VALUE_PLUGIN:
        if (direction == RWSTATS_DIR_TOP) {
            cmp_fn = &rwstatsComparePluginTop;
        } else {
            cmp_fn = &rwstatsComparePluginBtm;
        }
        break;
      case VALUE_BYTES:
      case DISTINCT_SIP:
      case DISTINCT_DIP:
        if (direction == RWSTATS_DIR_TOP) {
            cmp_fn = &rwstatsCompareCountsTop64;
        } else {
            cmp_fn = &rwstatsCompareCountsBtm64;
        }
        break;
      case VALUE_PACKETS:
      case VALUE_FLOWS:
        if (direction == RWSTATS_DIR_TOP) {
            cmp_fn = &rwstatsCompareCountsTop32;
        } else {
            cmp_fn = &rwstatsCompareCountsBtm32;
        }
        break;
    }

    /*
     * Sort the entries then iterate through the hash table.  When we
     * are computing a single distinct field, we calculate it by
     * counting the number of hash entries that have the same key
     * (minus the distinct column).
     *
     * To handle multiple distinct columns, we count the first
     * distinct value by counting the number of times it changes for
     * the same key.  The other distinct columns are calculated by
     * checking whether we have seen the value before, and if not
     * incrementing a counter.
     */
    hashlib_sort_entries(ht);

    /* convert a percentage or threshold to a count */
    rwstatsComputeLimitsDistinct(ht);

    /* did we read data?  is the threshold too high or low? */
    if ((0 == limit.entries) || (0 == limit.value[RWSTATS_COUNT])) {
        topnPrintHeader();
        return;
    }

    /* create the iterator */
    ithash = hashlib_create_iterator(ht);

    /* get first key/value from hash table */
    if (hashlib_iterate(ht, &ithash, &hash_key, &hash_val)
        == ERR_NOMOREENTRIES)
    {
        /* no data.  done. */
        return;
    }

    /* Create the heap */
    heap = skHeapCreate(cmp_fn, limit.value[RWSTATS_COUNT],
                        heap_node_bytes, NULL);
    if (NULL == heap) {
        skAppPrintErr(("Unable to create heap of %" PRIu64
                       " %" PRIu32 "-byte elements"),
                      limit.value[RWSTATS_COUNT], (uint32_t)heap_node_bytes);
        exit(EXIT_FAILURE);
    }

    /* cache the key and start summing the values */
    memcpy(newnode_key, hash_key, key_byte_width);
    memcpy(newnode_val, hash_val, hash_value_bytes);

    /* set up the handling of distinct field(s) */
    if (1 == distinct_num_fields) {
        distincts[0].dv_type = RWSTATS_DISTINCT_COUNT;
        distincts[0].dv_count = 1;
    } else if (2 == distinct_num_fields) {
        /* the first field gets cached */
        distincts[0].dv_type = RWSTATS_DISTINCT_CACHE;
        distincts[0].dv_count = 1;
        GET_DISTINCT_IP(hash_key, 0, &distincts[0].dv_v.dv_cache);

        /* create an IPset for keeping track of IPs we see, and add
         * the second IP from the key to it */
        distincts[1].dv_type = RWSTATS_DISTINCT_IPSET;
        if (skIPTreeCreate(&distincts[1].dv_v.dv_ipset)) {
            skAppPrintErr("Unable to create IPset");
            appExit(EXIT_FAILURE);
        }
        distincts[1].dv_count = 0;
        GET_DISTINCT_IP(hash_key, 1, &tmp_ip);
        MEMCHECK_IPSET_ADD(distincts[1].dv_v.dv_ipset, tmp_ip,
                           distincts[1].dv_count);
    } else {
        skAppPrintErr("Unhandled distinct_num_fields %d",
                      (int)distinct_num_fields);
        skAbort();
    }

    /* process the remaining hash entries */
    for (;;) {
        /* read a new key/value pair from the hash */
        if (hashlib_iterate(ht, &ithash, &hash_key, &hash_val)
            == ERR_NOMOREENTRIES)
        {
            final = 1;
        }

        /* compare keys */
        if (!final
            && (0 == memcmp(newnode_key, hash_key, key_byte_width)))
        {
            /* keys match; add current 'hash_val' to 'newnode_val' */
            mergeValues(newnode_val, hash_val);
            for (d = 0; d < distinct_num_fields; ++d) {
                switch (distincts[d].dv_type) {
                  case RWSTATS_DISTINCT_COUNT:
                    ++distincts[d].dv_count;
                    break;
                  case RWSTATS_DISTINCT_CACHE:
                    GET_DISTINCT_IP(hash_key, d, &tmp_ip);
                    if (distincts[d].dv_v.dv_cache != tmp_ip) {
                        ++distincts[d].dv_count;
                        distincts[d].dv_v.dv_cache = tmp_ip;
                    }
                    break;
                  case RWSTATS_DISTINCT_IPSET:
                    GET_DISTINCT_IP(hash_key, d, &tmp_ip);
                    MEMCHECK_IPSET_ADD(distincts[d].dv_v.dv_ipset, tmp_ip,
                                       distincts[d].dv_count);
                    break;
                }
            }
        } else {
            /* keys differ; store the distinct value into the merged value */
            for (d = 0; d < distinct_num_fields; ++d) {
                VAL_FIELD_MEMSET(newnode_val,
                                 distinct_fields[d].df_val_field_pos,
                                 &distincts[d].dv_count);
            }

            /* maybe insert the key/value into heap */
            if (heap_num_entries < limit.value[0]) {
                /* there is still room in the heap */
                skHeapInsert(heap, newnode);
                ++heap_num_entries;
                if (heap_num_entries == limit.value[0]) {
                    /* Get the node at the top of heap and its value.
                     * This is the smallest value in the topN. */
                    skHeapPeekTop(heap, (skheapnode_t*)&heap_ptr);
                }
            } else if (cmp_fn(heap_ptr, newnode) > 0) {
                skHeapReplaceTop(heap, newnode, NULL);
                /* the top may have changed; get the new top */
                skHeapPeekTop(heap, (skheapnode_t*)&heap_ptr);
            }
            if (final) {
                break;
            }

            /* remember the new key and reset values and distincts */
            memcpy(newnode_key, hash_key, key_byte_width);
            memcpy(newnode_val, hash_val, value_byte_width);
            for (d = 0; d < distinct_num_fields; ++d) {
                switch (distincts[d].dv_type) {
                  case RWSTATS_DISTINCT_COUNT:
                    distincts[d].dv_count = 1;
                    break;
                  case RWSTATS_DISTINCT_CACHE:
                    distincts[d].dv_count = 1;
                    GET_DISTINCT_IP(hash_key, d, &distincts[d].dv_v.dv_cache);
                    break;
                  case RWSTATS_DISTINCT_IPSET:
                    distincts[d].dv_count = 0;
                    skIPTreeRemoveAll(distincts[d].dv_v.dv_ipset);
                    GET_DISTINCT_IP(hash_key, d, &tmp_ip);
                    MEMCHECK_IPSET_ADD(distincts[d].dv_v.dv_ipset, tmp_ip,
                                       distincts[d].dv_count);
                    break;
                }
            }
        }
    }

    for (d = 0; d < distinct_num_fields; ++d) {
        if (RWSTATS_DISTINCT_IPSET == distincts[d].dv_type) {
            skIPTreeDelete(&distincts[d].dv_v.dv_ipset);
        }
    }

    rwstatsPrintHeap(heap);
    skHeapFree(heap);
}


/*
 *  count = hashOpenAllTempFiles(fps[], temp_file_idx);
 *
 *    Open all temporary files, where the index of the last temporary
 *    file is 'temp_file_idx', put the file handles in the array 'fps',
 *    and return the number of files opened.
 *
 *    If it is impossible to open all files due to a lack of file
 *    handles, the existing temporary files will be merged into new
 *    temporary files, and then another attempt will be made to open
 *    all files.
 *
 *    This function will only return when it is possible to return a
 *    file handle to every existing temporary file.  If it is unable
 *    to create a new temporary file, it causes the application to
 *    exit.
 */
static int hashOpenAllTempFiles(
    FILE   *fps[],
    int     temp_file_idx)
{
    uint8_t node[MAX_MERGE_FILES][MAX_HASH_NODE_BYTES];
    uint16_t reading[MAX_MERGE_FILES];
    uint16_t open_count;
    uint16_t read_count;
    uint16_t lowest;
    uint16_t read_idx;
    uint16_t i;
    int j;
    int tmp_idx_a;
    int tmp_idx_b;
    FILE *fp_intermediate = NULL;
    int idx_intermediate;

    memset(reading, 0, sizeof(reading));

    /* index at which to start the merge */
    tmp_idx_a = 0;

    /* This loop repeats as long as we haven't opened all of the temp
     * files generated while reading the flows. */
    for (;;) {
        /* index at which to stop the merge */
        tmp_idx_b = temp_file_idx;

        TRACEMSG(("Attempting to open %d temp files #%d through #%d",
                  (1 + tmp_idx_b - tmp_idx_a), tmp_idx_a, tmp_idx_b));

        /* open an intermediate temp file.  Data will be merged into
         * this new temp file if there are not enough file handles
         * available to open all the tempoary files we wrote while
         * reading the data. */
        fp_intermediate = skTempFileCreate(&idx_intermediate, NULL);
        if (fp_intermediate == NULL) {
            skAppPrintSyserror("Unable to create temporary file");
            appExit(EXIT_FAILURE);
        }

        open_count = 0;

        /* Attempt to open all temp files, but stop after
         * MAX_MERGE_FILES files, or if we fail due to lack of
         * resources (EMFILE or ENOMEM) */
        for (j = tmp_idx_a; j <= tmp_idx_b; ++j) {
            fps[open_count] = skTempFileOpen(j);
            if (fps[open_count] == NULL) {
                if ((open_count > 0)
                    && ((errno == EMFILE) || (errno == ENOMEM)))
                {
                    /* Blast!  We can't open any more temp files.  So,
                     * we rewind by one to catch this one the next
                     * time around. */
                    tmp_idx_b = j - 1;
                    TRACEMSG((("EMFILE limit hit--"
                               "merging #%d through #%d to #%d"),
                              tmp_idx_a, tmp_idx_b, idx_intermediate));
                    break;
                } else {
                    skAppPrintSyserror("Unable to open temporary file '%s'",
                                       skTempFileGetName(j));
                    appExit(EXIT_FAILURE);
                }
            }

            ++open_count;
            if (open_count == MAX_MERGE_FILES) {
                /* We've reached the limit for this pass.  Set
                 * tmp_idx_b to the file we just opened. */
                tmp_idx_b = j;
                TRACEMSG((("MAX_MERGE_FILES limit hit--"
                           "merging #%d through #%d to #%d"),
                          tmp_idx_a, tmp_idx_b, idx_intermediate));
                break;
            }
        }

        TRACEMSG(("Opened %d temp files", open_count));

        /* Check to see if we've opened all temp files.  If so, close
         * the intermediate file and return */
        if (tmp_idx_b == temp_file_idx) {
            TRACEMSG(("Successfully opened all temp files"));
            if (EOF == fclose(fp_intermediate)) {
                skAppPrintSyserror("Error closing temporary file '%s'",
                                   skTempFileGetName(idx_intermediate));
                appExit(EXIT_FAILURE);
            }
            return open_count;
        }
        /* Else, we could not open all temp files, so merge all opened
         * temp files into the intermediate file, and add the
         * intermediate file to the list of temp files to be merged */
        temp_file_idx = idx_intermediate;

        /* count number of files with data to read */
        read_count = 0;

        /* Read the first key/value pair from each temp file into the
         * work buffers. */
        for (i = 0; i < open_count; ++i) {
            if (!fread(node[i], hash_node_bytes, 1, fps[i])) {
                TRACEMSG(("Could not get first record from file '%s'",
                          skTempFileGetName(tmp_idx_a + i)));
                continue;
            }
            /* 'reading' holds the indexes of the files from which we
             * are reading records */
            reading[read_count] = i;
            ++read_count;
        }

        TRACEMSG((("Merging temporary files... open_count: %" PRIu16
                   "; read_count: %" PRIu16),
                  open_count, read_count));

        /* exit this while() once we are only processing a single
         * file */
        while (read_count > 1) {
            /* set "lowest" to first file with data. use 'read_idx' to
             * remember this position in the 'reading' array */
            read_idx = 0;
            lowest = reading[read_idx];

            /* compare "lowest" with the current node in every other
             * file to find the actual lowest */
            for (i = 1; i < read_count; ++i) {
                /* If the temp file's key is lower than the lowest... */
                if (memcmp(node[reading[i]], node[lowest], hash_key_bytes) < 0)
                {
                    /* record the index of this temp file as having
                     * the lowest key */
                    read_idx = i;
                    lowest = reading[read_idx];
                }
            }

            /* write the lowest key/value pair to the intermediate
             * temp file */
            if (!fwrite(node[lowest], hash_node_bytes, 1, fp_intermediate))
            {
                skAppPrintSyserror("Could not write to temporary file '%s'",
                                   skTempFileGetName(idx_intermediate));
                appExit(EXIT_FAILURE);
            }

            /* replace the node we just wrote */
            if (!fread(node[lowest], hash_node_bytes, 1, fps[lowest])) {
                /* no more data for this file; replace file with last
                 * file in 'reading' array */
                --read_count;
                reading[read_idx] = reading[read_count];
            }
        }

        /* read records from the single remaining file */
        if (read_count) {
            assert(1 == read_count);
            lowest = reading[0];
            do {
                if (!fwrite(node[lowest], hash_node_bytes, 1, fp_intermediate))
                {
                    skAppPrintSyserror("Could not write to temporary file '%s'",
                                       skTempFileGetName(idx_intermediate));
                    appExit(EXIT_FAILURE);
                }
            } while (fread(node[lowest], hash_node_bytes, 1, fps[lowest]));
        }

        TRACEMSG((("Finished processing #%d through #%d"),
                  tmp_idx_a, tmp_idx_b));

        /* Close all of the temp files that we processed this time. */
        for (i = 0; i < open_count; ++i) {
            fclose(fps[i]);
        }
        /* Delete all the temp files that we opened */
        for (j = tmp_idx_a; j <= tmp_idx_b; ++j) {
            skTempFileRemove(j);
        }

        /* Close the intermediate temp file. */
        if (fp_intermediate) {
            TRACEMSG(("Finished writing #%d, '%s'",
                      idx_intermediate, skTempFileGetName(idx_intermediate)));
            if (EOF == fclose(fp_intermediate)) {
                skAppPrintSyserror("Error closing temporary file '%s'",
                                   skTempFileGetName(idx_intermediate));
                appExit(EXIT_FAILURE);
            }
            fp_intermediate = NULL;
        }

        /* Start the next merge with the next temp file */
        tmp_idx_a = tmp_idx_b + 1;
    }

    return -1;    /* NOT REACHED */
}


/*
 *  hashMergeFiles(temp_file_idx)
 *
 *    Merge the temporary files numbered from 0 to 'temp_file_idx'
 *    inclusive into the global output file 'out_ios'.  Exits the
 *    application if an error occurs.
 */
static void hashMergeFiles(int temp_file_idx)
{
    skheapcmpfn_t cmp_fn = NULL;
    skheap_t *heap = NULL;
    uint8_t *heap_ptr;
    uint8_t newnode[MAX_HASH_NODE_BYTES];
    uint8_t *newnode_key;
    uint8_t *newnode_val;
    uint32_t heap_num_entries = 0;
    int final = 0;

    uint8_t *cached_node = scratch;
    uint8_t *merged_values = scratch + hash_key_bytes;
    FILE *fps[MAX_MERGE_FILES];
    uint8_t node[MAX_MERGE_FILES][MAX_HASH_NODE_BYTES];
    uint16_t reading[MAX_MERGE_FILES];
    uint16_t open_count;
    uint16_t read_count = 0;
    uint16_t lowest = 0;
    uint16_t read_idx = 0;
    uint16_t i;
    distinct_value_t distincts[NUM_DISTINCTS];
    uint32_t tmp_ip;
    size_t d;

    /* set up pointers into the heap node */
    newnode_key = HEAP_NODE_KEY_PTR(newnode);
    newnode_val = HEAP_NODE_VAL_PTR(newnode);

    memset(reading, 0, sizeof(reading));
    memset(distincts, 0, sizeof(distincts));

    /* set comparison function */
    switch (value_fields[0].vf_id) {
      case VALUE_PLUGIN:
        if (direction == RWSTATS_DIR_TOP) {
            cmp_fn = &rwstatsComparePluginTop;
        } else {
            cmp_fn = &rwstatsComparePluginBtm;
        }
        break;
      case VALUE_BYTES:
      case DISTINCT_SIP:
      case DISTINCT_DIP:
        if (direction == RWSTATS_DIR_TOP) {
            cmp_fn = &rwstatsCompareCountsTop64;
        } else {
            cmp_fn = &rwstatsCompareCountsBtm64;
        }
        break;
      case VALUE_PACKETS:
      case VALUE_FLOWS:
        if (direction == RWSTATS_DIR_TOP) {
            cmp_fn = &rwstatsCompareCountsTop32;
        } else {
            cmp_fn = &rwstatsCompareCountsBtm32;
        }
        break;
    }

    /* create the heap */
    heap = skHeapCreate(cmp_fn, limit.value[RWSTATS_COUNT],
                        heap_node_bytes, NULL);
    if (NULL == heap) {
        skAppPrintErr(("Unable to create heap of %" PRIu64
                       " %" PRIu32 "-byte elements"),
                      limit.value[RWSTATS_COUNT], (uint32_t)heap_node_bytes);
        exit(EXIT_FAILURE);
    }

    /* open all temp files.  note that this only returns once all
     * temporary files are open. */
    open_count = hashOpenAllTempFiles(fps, temp_file_idx);

    /* read the first key/value pair from each temp file into the work
     * buffers and find the one with the lowest key.  Use the entire
     * hash_key---i.e., the user's key and any distinct columns. */
    for (i = 0; i < open_count; ++i) {
        if (!fread(node[i], hash_node_bytes, 1, fps[i])) {
            TRACEMSG(("Cannot read first record from file"));
            continue;
        }

        if ((0 == read_count)
            || (memcmp(node[i], node[lowest], hash_key_bytes) < 0))
        {
            /* either this is the first node we've seen, or the node
             * from 'i' is lower than current lowest; record the index
             * of this temp file as being lowest */
            lowest = i;
            read_idx = read_count;
        }

        reading[read_count] = i;
        ++read_count;
    }

    if (0 == read_count) {
        skAppPrintErr("Could not read records from any temporary files");
        appExit(EXIT_FAILURE);
    }

    /* cache the lowest */
    memcpy(cached_node, node[lowest], hash_node_bytes);

    /* initialize everything required to keep track of distinct
     * counts. */
    switch (distinct_num_fields) {
      case 0:
        break;

      case 2:
        /* create an IPset for keeping track of IPs we see, and add
         * the second IP from the key to it */
        distincts[1].dv_type = RWSTATS_DISTINCT_IPSET;
        if (skIPTreeCreate(&distincts[1].dv_v.dv_ipset)) {
            skAppPrintErr("Unable to create IPset");
            appExit(EXIT_FAILURE);
        }
        distincts[1].dv_count = 0;
        GET_DISTINCT_IP(cached_node, 1, &tmp_ip);
        MEMCHECK_IPSET_ADD(distincts[1].dv_v.dv_ipset, tmp_ip,
                           distincts[1].dv_count);
        /* FALLTHROUGH */

      case 1:
        /* the first field gets cached */
        distincts[0].dv_type = RWSTATS_DISTINCT_CACHE;
        distincts[0].dv_count = 1;
        GET_DISTINCT_IP(cached_node, 0, &distincts[0].dv_v.dv_cache);
        break;

      default:
        skAbortBadCase(distinct_num_fields);
    }

    /* process all remaining key/value pairs in all opened files */
    for (;;) {
        /* replace the key/value pair we just processed */
        if (!fread(node[lowest], hash_node_bytes, 1, fps[lowest])) {
            /* finished with this file; replace with last file in array */
            --read_count;
            reading[read_idx] = reading[read_count];
            if (0 == read_count) {
                /* no more data; exit the for() loop after we add the
                 * final cached_node to the heap */
                final = 1;
            }
        }

        /* set 'lowest' to the first file with data */
        read_idx = 0;
        lowest = reading[read_idx];

        /* compare "lowest" with every other file to find the
         * actual lowest */
        for (i = 1; i < read_count; ++i) {
            /* If the temp file's key is lower than the current
             * lowest... */
            if (memcmp(node[reading[i]], node[lowest], hash_key_bytes) < 0) {
                /* ...record the index of this temp file as having
                 * the lowest key */
                read_idx = i;
                lowest = reading[read_idx];
            }
        }

        /* found the lowest key; compare this key with the cached_node;
         * using only the user's key (ie, ignoring distincts) */
        if (!final
            && 0 == memcmp(cached_node, node[lowest], key_byte_width))
        {
            /* keys are same; add the current values */
            mergeValues(merged_values, node[lowest] + hash_key_bytes);

            /* handle the distinct fields */
            for (d = 0; d < distinct_num_fields; ++d) {
                switch (distincts[d].dv_type) {
                  case RWSTATS_DISTINCT_COUNT:
                    skAbortBadCase(distincts[d].dv_type);
                  case RWSTATS_DISTINCT_CACHE:
                    GET_DISTINCT_IP(node[lowest], d, &tmp_ip);
                    if (distincts[d].dv_v.dv_cache != tmp_ip) {
                        ++distincts[d].dv_count;
                        distincts[d].dv_v.dv_cache = tmp_ip;
                    }
                    break;
                  case RWSTATS_DISTINCT_IPSET:
                    GET_DISTINCT_IP(node[lowest], d, &tmp_ip);
                    MEMCHECK_IPSET_ADD(distincts[d].dv_v.dv_ipset, tmp_ip,
                                       distincts[d].dv_count);
                    break;
                }
            }

        } else {
            /* keys differ (or this is final value); prepare a
             * heapnode from the cached_node and merged_values */
            memcpy(newnode_key, cached_node, key_byte_width);
            memcpy(newnode_val, merged_values, value_byte_width);
            for (d = 0; d < distinct_num_fields; ++d) {
                VAL_FIELD_MEMSET(newnode_val,
                                 distinct_fields[d].df_val_field_pos,
                                 &distincts[d].dv_count);
            }

            ++limit.entries;
            /* maybe insert the key/value into heap */
            if (heap_num_entries < limit.value[0]) {
                /* there is still room in the heap */
                skHeapInsert(heap, newnode);
                ++heap_num_entries;
                if (heap_num_entries == limit.value[0]) {
                    /* Get the node at the top of heap and its value.
                     * This is the smallest value in the topN. */
                    skHeapPeekTop(heap, (skheapnode_t*)&heap_ptr);
                }
            } else if (cmp_fn(heap_ptr, newnode) > 0) {
                skHeapReplaceTop(heap, newnode, NULL);
                /* the top may have changed; get the new top */
                skHeapPeekTop(heap, (skheapnode_t*)&heap_ptr);
            }

            /* exit the loop if there is no more data */
            if (final) {
                break;
            }

            /* cache the node (key and values) and reset distincts */
            memcpy(cached_node, node[lowest], hash_node_bytes);
            for (d = 0; d < distinct_num_fields; ++d) {
                switch (distincts[d].dv_type) {
                  case RWSTATS_DISTINCT_COUNT:
                    skAbortBadCase(distincts[d].dv_type);
                  case RWSTATS_DISTINCT_CACHE:
                    distincts[d].dv_count = 1;
                    GET_DISTINCT_IP(cached_node, d,
                                    &distincts[d].dv_v.dv_cache);
                    break;
                  case RWSTATS_DISTINCT_IPSET:
                    distincts[d].dv_count = 0;
                    skIPTreeRemoveAll(distincts[d].dv_v.dv_ipset);
                    GET_DISTINCT_IP(cached_node, d, &tmp_ip);
                    MEMCHECK_IPSET_ADD(distincts[d].dv_v.dv_ipset, tmp_ip,
                                       distincts[d].dv_count);
                    break;
                }
            }
        }
    }

    /* don't need the IPsets any longer */
    for (d = 0; d < distinct_num_fields; ++d) {
        if (RWSTATS_DISTINCT_IPSET == distincts[d].dv_type) {
            skIPTreeDelete(&distincts[d].dv_v.dv_ipset);
        }
    }

    /* generate the output */
    rwstatsPrintHeap(heap);

    skHeapFree(heap);
}


/*
 *  status = hashWriteToTemp(&tmp_idx, ht);
 *
 *    Write the entries in the hash table 'ht' to a temporary file.
 *    Return the index of that temporary file in 'tmp_idx'.  The
 *    entries are written in sorted order.  Return 0 on success, or -1
 *    on failure.
 */
static int hashWriteToTemp(
    int        *tmp_idx,
    HashTable  *ht)
{
    HASH_ITER ithash;
    uint8_t *hash_key;
    uint8_t *hash_val;
    FILE* temp_filep = NULL;
    int rv = -1; /* return value */

    /* sort the hash entries. */
    hashlib_sort_entries(ht);

    /* create the iterator */
    ithash = hashlib_create_iterator(ht);

    /* create temp file */
    temp_filep = skTempFileCreate(tmp_idx, NULL);
    if (temp_filep == NULL) {
        return rv;
    }

    TRACEMSG(("Writing %u key/value pairs to '%s'...",
              hashlib_count_entries(ht), skTempFileGetName(*tmp_idx)));

    /* write the sorted entries to disk */
    while (hashlib_iterate(ht, &ithash, &hash_key, &hash_val)
           != ERR_NOMOREENTRIES)
    {
        if (!fwrite(hash_key, hash_key_bytes, 1, temp_filep)
            || !fwrite(hash_val, hash_value_bytes, 1, temp_filep))
        {
            /* error writing, errno may or may not be set */
            skAppPrintSyserror("Error writing records to temp file %s",
                               skTempFileGetName(*tmp_idx));
            TRACEMSG(("ERROR!"));
            goto END;
        }
    }

    /* Success so far */
    TRACEMSG(("done"));
    rv = 0;

  END:
    /* close the file */
    if (fclose(temp_filep) == EOF) {
        /* error closing file; only report error if status so far
         * is good. */
        if (rv == 0) {
            skAppPrintSyserror("Error closing temp file %s",
                               skTempFileGetName(*tmp_idx));
            TRACEMSG(("Error on close"));
            rv = -1;
        }
    }

    return rv;
}


/*
 *  statsHash();
 *
 *    Main control function that creates a hash table, processes the
 *    input (files or stdin), and prints the results.
 */
static void statsHash(void)
{
    uint8_t no_val_ptr[MAX_VALUE_BYTE_WIDTH];
    HashTable *ht;
    uint8_t *hash_key = scratch; /* use global scratch space */
    uint8_t *hash_val;
    int temp_file_idx = -1;
    int rv = 0;
    rwRec rwrec;

    /* Initialize the value that the hash table uses to indicate a key
     * without a value */
    assert(sizeof(no_val_ptr) >= hash_value_bytes);
    memset(no_val_ptr, 0xFF, sizeof(no_val_ptr));

    /* Create the hash table */
    ht = hashlib_create_table(hash_key_bytes, hash_value_bytes,
                              HTT_INPLACE,
                              no_val_ptr,
                              NULL,
                              0,
                              HASH_INITIAL_SIZE,
                              DEFAULT_LOAD_FACTOR);
    if (NULL == ht) {
        skAppPrintErr("Unable to create hash table");
        appExit(EXIT_FAILURE);
    }

    /* Fill in the record and specified key from the input stream */
    while (fillRecordAndKeyForHash(&rwrec, hash_key)) {

        /* the 'insert' will set 'hash_val' to the memory to use to
         * store the values. either fresh memory or the existing
         * value(s). */
        rv = hashlib_insert(ht, hash_key, &hash_val);
        switch (rv) {
          case OK:
            /* new bin; initialize the memory */
            memcpy(hash_val, value_initial_vals, hash_value_bytes);
            /* FALLTHROUGH */

          case OK_DUPLICATE:
            /* existing value; add new value */
            addRecToValues(&rwrec, hash_val);
            break;

          case ERR_OUTOFMEMORY:
          case ERR_NOMOREBLOCKS:
            /* out of memory */
            if (hashWriteToTemp(&temp_file_idx, ht)) {
                skAppPrintErr("Unable to write hash to temporary file");
                appExit(EXIT_FAILURE);
            }
            /* destroy and re-create the hash table */
            hashlib_free_table(ht);
            ht = hashlib_create_table(hash_key_bytes, hash_value_bytes,
                                      HTT_INPLACE,
                                      no_val_ptr,
                                      NULL,
                                      0,
                                      HASH_INITIAL_SIZE,
                                      DEFAULT_LOAD_FACTOR);
            if (NULL == ht) {
                skAppPrintErr("Unable to recreate hash table");
                rv = 1;
                goto END;
            }
            /* add this entry to the newly created hash table */
            rv = hashlib_insert(ht, hash_key, &hash_val);
            if (rv == OK) {
                memcpy(hash_val, value_initial_vals, hash_value_bytes);
                addRecToValues(&rwrec, hash_val);
            } else {
                skAppPrintErr(("Unexpected return code '%d'"
                               " from hash table insert on new hash table"),
                              rv);
                appExit(EXIT_FAILURE);
            }
            break;

          default:
            skAppPrintErr(("Unexpected return code '%d'"
                           " from hash table insert"),
                          rv);
            appExit(EXIT_FAILURE);
        }
    }

    if (-1 != temp_file_idx) {
        /* dump the current/final hash entries to a file */
        if (hashWriteToTemp(&temp_file_idx, ht)) {
            skAppPrintErr("Unable to write hash to temporary file");
            appExit(EXIT_FAILURE);
        }
        /* destroy the hash table; we're done with it */
        hashlib_free_table(ht);
        ht = NULL;

        hashMergeFiles(temp_file_idx);
    } else if (distinct_num_fields == 0) {
        printTableSimple(ht);
    } else {
        printTableDistinct(ht);
    }

  END:
    if (ht) {
        hashlib_free_table(ht);
    }
}


/*
 *  ok = presortedNodeToTemp(fp, key, value, distincts);
 *
 *    Write new key/value pair(s) to the temp file 'fp'.  The
 *    parameters 'key' and 'value' hold the current key fields and
 *    current value fields, respectively, encoded as a byte array.
 *    'distincts' may hold IPsets for the source and/or destination
 *    IPs that we saw for this bin.
 */
static int presortedNodeToTemp(
    FILE               *fp,
    const uint8_t      *key,
    const uint8_t      *value,
    distinct_value_t   *distincts)
{
    skIPTreeIterator_t iter;
    uint32_t ipv4;
    size_t d;

    /* write the key and the value */
    if (!fwrite(key, key_byte_width, 1, fp)
        || !fwrite(value, value_byte_width, 1, fp))
    {
        return -1;
    }

    /* for each distinct field, write the number of IPs, and then each
     * individual IP address */
    for (d = 0; d < distinct_num_fields; ++d) {
        /* write the count */
        if (!fwrite(&distincts[d].dv_count, sizeof(uint64_t), 1, fp)) {
            return -1;
        }
        /* write all IPs in sequence */
        skIPTreeIteratorBind(&iter, distincts[d].dv_v.dv_ipset);
        while (skIPTreeIteratorNext(&ipv4, &iter) == SK_ITERATOR_OK) {
            if (!fwrite(&ipv4, sizeof(uint32_t), 1, fp)) {
                return -1;
            }
        }
        /* clear the IPs */
        distincts[d].dv_count = 0;
        skIPTreeRemoveAll(distincts[d].dv_v.dv_ipset);
    }

    return 0;
}


/*
 *  statsPresorted();
 *
 *    Main control function that reads presorted flow records from
 *    files or stdin and prints the results.
 */
static void statsPresorted(void)
{
#define FATAL_RW(frw_action, frw_idx)                                   \
    {                                                                   \
        skAppPrintSyserror(("Unable to %s temporary file '%s'"          \
                            " at %s:%d"),                               \
                           frw_action, skTempFileGetName(frw_idx),      \
                           __FILE__, __LINE__);                         \
        appExit(EXIT_FAILURE);                                          \
    }
#define FATAL_READ(fr_idx) FATAL_RW("read from", (fr_idx))
#define FATAL_WRITE(fw_idx) FATAL_RW("write to", (fw_idx))
#define FATAL_SEEK(fs_idx) FATAL_RW("seek within", (fs_idx))

    skheapcmpfn_t cmp_fn = NULL;
    skheap_t *heap = NULL;
    uint8_t *heap_ptr;
    uint8_t newnode[MAX_HASH_NODE_BYTES];
    uint8_t *newnode_key;
    uint8_t *newnode_val;
    uint32_t heap_num_entries = 0;

    uint8_t buf[4096];
    union fps_un {
        skstream_t *io;
        FILE       *f;
    } fps[MAX_MERGE_FILES];
    rwRec rwrec[MAX_MERGE_FILES];
    uint8_t node[MAX_MERGE_FILES][MAX_KEY_BYTE_WIDTH];
    uint8_t *cached_node;
    uint8_t *merged_values;
    distinct_value_t distincts[NUM_DISTINCTS];
    uint16_t same_key[MAX_MERGE_FILES];
    uint16_t reading[MAX_MERGE_FILES];
    uint16_t read_count;
    uint16_t read_idx;
    uint16_t open_count;
    uint16_t lowest;
    uint16_t i;
    uint16_t same_key_count;
    int step;
    FILE *tmp_fp = NULL;
    int no_more_inputs = 0;
    int j;
    int tmp_idx_a;
    int tmp_idx_b;
    int temp_file_idx = -1;
    int idx_intermediate;
    uint64_t ip_count;
    uint64_t k;
    size_t sz;
    uint32_t ipv4;
    int cmp;
    size_t d;
    int rv = 0;

    /* set up pointers into the heap node */
    newnode_key = HEAP_NODE_KEY_PTR(newnode);
    newnode_val = HEAP_NODE_VAL_PTR(newnode);

    /* use global scratch space */
    cached_node = scratch;
    merged_values = scratch + hash_key_bytes;

    memset(node, 0, sizeof(node));
    memset(reading, 0, sizeof(reading));
    memset(distincts, 0, sizeof(distincts));

    /* set comparison function */
    switch (value_fields[0].vf_id) {
      case VALUE_PLUGIN:
        if (direction == RWSTATS_DIR_TOP) {
            cmp_fn = &rwstatsComparePluginTop;
        } else {
            cmp_fn = &rwstatsComparePluginBtm;
        }
        break;
      case VALUE_BYTES:
      case DISTINCT_SIP:
      case DISTINCT_DIP:
        if (direction == RWSTATS_DIR_TOP) {
            cmp_fn = &rwstatsCompareCountsTop64;
        } else {
            cmp_fn = &rwstatsCompareCountsBtm64;
        }
        break;
      case VALUE_PACKETS:
      case VALUE_FLOWS:
        if (direction == RWSTATS_DIR_TOP) {
            cmp_fn = &rwstatsCompareCountsTop32;
        } else {
            cmp_fn = &rwstatsCompareCountsBtm32;
        }
        break;
    }

    /* If DISTINCT_SIP and/or DISTINCT_DIP were specified, create
     * IPsets for them. */
    for (d = 0; d < distinct_num_fields; ++d) {
        distincts[d].dv_ip = distinct_fields[d].df_id;
        distincts[d].dv_type = RWSTATS_DISTINCT_IPSET;
        if (skIPTreeCreate(&distincts[d].dv_v.dv_ipset)) {
            skAppPrintErr("Unable to create IPset");
            appExit(EXIT_FAILURE);
        }
    }

    /* This outer loop is over the SiLK Flow input files and it
     * repeats as long as we haven't read all the input files */
    do {
        /* open an intermediate temp file.  The function will write
         * records here if there are not enough file handles available
         * to open all the input files. */
        tmp_fp = skTempFileCreate(&idx_intermediate, NULL);
        if (NULL == tmp_fp) {
            skAppPrintSyserror("Unable to create temporary file");
            appExit(EXIT_FAILURE);
        }

        /* Attempt to open up to MAX_MERGE_FILES, though we an open
         * may fail due to lack of resources (EMFILE or ENOMEM) */
        for (open_count = 0; open_count < MAX_MERGE_FILES; ++open_count) {
            rv = appNextInput(&(fps[open_count].io));
            if (rv != 0) {
                break;
            }
        }
        switch (rv) {
          case 1:
            /* successfully opened all (remaining) input files */
            TRACEMSG(("Opened all (remaining) inputs"));
            no_more_inputs = 1;
            if (-1 != temp_file_idx) {
                /* must write to the temp file, since we have written
                 * data to temp files previously, and add this temp
                 * file to the files to process */
                temp_file_idx = idx_intermediate;
            } else {
                /* we opened all the input files in a single pass.  we
                 * no longer need the intermediate temp file */
                fclose(tmp_fp);
                tmp_fp = NULL;

                /* create the heap since we opened all input files in
                 * a single pass */
                heap = skHeapCreate(cmp_fn, limit.value[RWSTATS_COUNT],
                                    heap_node_bytes, NULL);
                if (NULL == heap) {
                    skAppPrintErr(("Unable to create heap of %" PRIu64
                                   " %" PRIu32 "-byte elements"),
                                  limit.value[RWSTATS_COUNT],
                                  (uint32_t)heap_node_bytes);
                    appExit(EXIT_FAILURE);
                }
            }
            break;
          case -1:
            /* unexpected error opening a file */
            appExit(EXIT_FAILURE);
          case -2:
            /* ran out of memory or file descriptors */
            TRACEMSG((("Unable to open all inputs---"
                       "out of memory or file handles")));
            /* add this temp file to the files to process */
            temp_file_idx = idx_intermediate;
            break;
          case 0:
            if (open_count == MAX_MERGE_FILES) {
                /* ran out of pointers for this run */
                TRACEMSG((("Unable to open all inputs---"
                           "MAX_MERGE_FILES limit reached")));
                /* add this temp file to the files to process */
                temp_file_idx = idx_intermediate;
                break;
            }
            /* no other way that rv == 0 */
            TRACEMSG(("rv == 0 but open_count is %d. Abort.",
                      open_count));
            skAbort();
          default:
            /* unexpected error */
            TRACEMSG(("Got unexpected rv value = %d", rv));
            skAbortBadCase(rv);
        }

        /* count number of files with data to read */
        read_count = 0;

        /* Read the first record from each file into the work buffer */
        for (i = 0; i < open_count; ++i) {
            if (fillRecordAndKey(fps[i].io, &rwrec[i], node[i])) {
                /* 'reading' holds the indexes of the files from which
                 * we are reading records */
                reading[read_count] = i;
                ++read_count;
            }
        }

        TRACEMSG((("Merging presorted files... open_count: %" PRIu16
                   "; read_count: %" PRIu16),
                  open_count, read_count));

        /* exit this while() once all records for all opened files
         * have been read */
        while (read_count) {
            /* set "lowest" to first file with data. use 'read_idx'
             * to remember this position in the 'reading' array */
            read_idx = 0;
            lowest = reading[read_idx];

            /* compare 'lowest' with every other file to find the
             * actual lowest */
            for (i = 1; i < read_count; ++i) {
                /* If the file's record is lower than the current
                 * lowest... */
                if (memcmp(node[reading[i]], node[lowest], key_byte_width) < 0)
                {
                    /* ...record the index of this file as having the
                     * lowest key */
                    read_idx = i;
                    lowest = reading[read_idx];
                }
            }

            /* cache this low key, initialize the values, then add the
             * values and the distincts from this record */
            memcpy(cached_node, node[lowest], key_byte_width);
            memcpy(merged_values, value_initial_vals, value_byte_width);
            addRecToValues(&rwrec[lowest], merged_values);
            for (d = 0; d < distinct_num_fields; ++d) {
                distinct_fields[d].df_r2k(&rwrec[lowest], &ipv4);
                MEMCHECK_IPSET_ADD(distincts[d].dv_v.dv_ipset,
                                   ipv4, distincts[d].dv_count);
            }

            /* replace the record and key we just processed */
            if (!fillRecordAndKey(fps[lowest].io, &rwrec[lowest], node[lowest]))
            {
                /* no more data for this file; replace file with last
                 * file in 'reading' array */
                --read_count;
                reading[read_idx] = reading[read_count];
            }

            /* process all entries in all open input files that match
             * the current key */
            for (i = 0; i < read_count; i += step) {
                step = 1;
                lowest = reading[i];
                while (memcmp(cached_node, node[lowest], key_byte_width) == 0)
                {
                    /* keys are same, add this entry's values */
                    addRecToValues(&rwrec[lowest], merged_values);
                    for (d = 0; d < distinct_num_fields; ++d) {
                        distinct_fields[d].df_r2k(&rwrec[lowest], &ipv4);
                        MEMCHECK_IPSET_ADD(distincts[d].dv_v.dv_ipset,
                                           ipv4, distincts[d].dv_count);
                    }

                    /* replace the record */
                    if (!fillRecordAndKey(fps[lowest].io, &rwrec[lowest],
                                          node[lowest]))
                    {
                        /* no more data for this file; replace file
                         * with last file, do not increment 'i' */
                        --read_count;
                        reading[i] = reading[read_count];
                        step = 0;
                        break;
                    }
                }
            }

            /* handle this key and its values.  If we opened all input
             * files, we can add the key/value to the heap.
             * Otherwise, we need to store the key, value, and any
             * distincts into the current temp file.  Once all inputs
             * are processed, we will merge the temporary files. */

            if (tmp_fp) {
                if (presortedNodeToTemp(tmp_fp, cached_node,
                                        merged_values, distincts))
                {
                    skAppPrintSyserror("Unable to write to temporary file '%s'",
                                       skTempFileGetName(idx_intermediate));
                    appExit(EXIT_FAILURE);
                }
            } else {
                /* create a heapnode that we may add to the heap;
                 * reset the distinct values */
                memcpy(newnode_key, cached_node, key_byte_width);
                memcpy(newnode_val, merged_values, value_byte_width);
                for (d = 0; d < distinct_num_fields; ++d) {
                    VAL_FIELD_MEMSET(newnode_val,
                                     distinct_fields[d].df_val_field_pos,
                                     &distincts[d].dv_count);
                    distincts[d].dv_count = 0;
                    skIPTreeRemoveAll(distincts[d].dv_v.dv_ipset);
                }

                ++limit.entries;
                /* maybe insert the key/value into heap */
                if (heap_num_entries < limit.value[0]) {
                    /* there is still room in the heap */
                    skHeapInsert(heap, newnode);
                    ++heap_num_entries;
                    if (heap_num_entries == limit.value[0]) {
                        /* Get the node at the top of heap and its value.
                         * This is the smallest value in the topN. */
                        skHeapPeekTop(heap, (skheapnode_t*)&heap_ptr);
                    }
                } else if (cmp_fn(heap_ptr, newnode) > 0) {
                    skHeapReplaceTop(heap, newnode, NULL);
                    /* the top may have changed; get the new top */
                    skHeapPeekTop(heap, (skheapnode_t*)&heap_ptr);
                }
            }
        } /* inner-while */

        /* Close the input files that we processed this time. */
        for (i = 0; i < open_count; ++i) {
            skStreamDestroy(&fps[i].io);
        }

        /* Close the intermediate temp file. */
        if (tmp_fp) {
            TRACEMSG(("Finished writing #%d '%s'",
                      idx_intermediate, skTempFileGetName(idx_intermediate)));
            if (EOF == fclose(tmp_fp)) {
                skAppPrintSyserror("Error closing temporary file '%s'",
                                   skTempFileGetName(idx_intermediate));
                appExit(EXIT_FAILURE);
            }
            tmp_fp = NULL;
        }

    } while (!no_more_inputs);

    /* If no temporary files were written, jump to the end of this
     * function to generate the output */
    if (-1 == temp_file_idx) {
        goto END;
    }
    /* else need to merge the temporary files */

    /* reset values */
    read_count = 0;
    lowest = 0;
    read_idx = 0;
    memset(reading, 0, sizeof(reading));

    /* index at which to start the merge */
    tmp_idx_a = 0;

    /* This loop repeats as long as there are temporary files to
     * process. */
    while (tmp_idx_a <= temp_file_idx) {
        /* index at which to stop the merge */
        tmp_idx_b = temp_file_idx;

        TRACEMSG(("Attempting to open %d temporary files (#%d...#%d)",
                  (1 + tmp_idx_b - tmp_idx_a), tmp_idx_a, tmp_idx_b));

        /* open an intermediate temp file.  Data will be merged into
         * this new temp file if there are not enough file handles
         * available to open all (remaining) temporary files. */
        tmp_fp = skTempFileCreate(&idx_intermediate, NULL);
        if (NULL == tmp_fp) {
            skAppPrintSyserror("Unable to create temporary file");
            appExit(EXIT_FAILURE);
        }

        open_count = 0;

        /* Attempt to open all temp files, but stop after
         * MAX_MERGE_FILES files, or if we fail due to lack of
         * resources (EMFILE or ENOMEM) */
        for (j = tmp_idx_a; j <= tmp_idx_b; ++j) {
            fps[open_count].f = skTempFileOpen(j);
            if (fps[open_count].f == NULL) {
                if ((open_count > 0)
                    && ((errno == EMFILE) || (errno == ENOMEM)))
                {
                    /* Blast!  We can't open any more temp files.  So,
                     * we rewind by one to catch this one the next
                     * time around. */
                    tmp_idx_b = j - 1;
                    TRACEMSG((("EMFILE limit hit--"
                               "merging #%d through #%d to #%d"),
                              tmp_idx_a, tmp_idx_b, idx_intermediate));
                    break;
                } else {
                    skAppPrintSyserror("Unable to open temporary file '%s'",
                                       skTempFileGetName(j));
                    appExit(EXIT_FAILURE);
                }
            }

            ++open_count;
            if (open_count == MAX_MERGE_FILES) {
                /* We've reached the limit for this pass.  Set
                 * tmp_idx_b to the file we just opened. */
                tmp_idx_b = j;
                TRACEMSG((("MAX_MERGE_FILES limit hit--"
                           "merging #%d through #%d to #%d"),
                          tmp_idx_a, tmp_idx_b, idx_intermediate));
                break;
            }
        }

        /* Check to see if we've opened all temp files. */
        if (tmp_idx_b != temp_file_idx) {
            /* we could not open all temp files, so merge all opened
             * temp files into the intermediate file, and add the
             * intermediate file to the list of temp files to be
             * merged */
            temp_file_idx = idx_intermediate;
        } else {
            /* we opened all (remaining) temp files.  Close the
             * intermediate temp file, and create the heap. */
            TRACEMSG((("Successfully opened the %" PRIu16
                       " remaining temporary files"),
                      open_count));
            fclose(tmp_fp);
            tmp_fp = NULL;

            heap = skHeapCreate(cmp_fn, limit.value[RWSTATS_COUNT],
                                heap_node_bytes, NULL);
            if (NULL == heap) {
                skAppPrintErr(("Unable to create heap of %" PRIu64
                               " %" PRIu32 "-byte elements"),
                              limit.value[RWSTATS_COUNT],
                              (uint32_t)heap_node_bytes);
                appExit(EXIT_FAILURE);
            }
        }

        /* count number of files with data to read */
        read_count = 0;

        /* Read the first key from each temp file into the work
         * buffers. */
        for (i = 0; i < open_count; ++i) {
            if (!fread(node[i], key_byte_width, 1, fps[i].f)) {
                TRACEMSG(("Could not get first key from file #%d '%s'",
                          tmp_idx_a + i, skTempFileGetName(tmp_idx_a + i)));
                continue;
            }
            /* 'reading' holds the indexes of the files from which we
             * are reading records */
            reading[read_count] = i;
            ++read_count;
        }

        TRACEMSG((("Merging temporary files... open_count: %" PRIu16
                   "; read_count: %" PRIu16 ),
                  open_count, read_count));

        /* when finding lowest key, keep track of which files have
         * identical keys */
        same_key_count = 1;

        /* exit this while() once we have processed all the temporary
         * files we opened this time */
        while (read_count) {
            /* set 'lowest' to the first file with data */
            read_idx = 0;
            lowest = reading[read_idx];

            /* compare "lowest" with every other file to find the
             * actual lowest.  keep track of which files have the same
             * key */
            for (i = 1; i < read_count; ++i) {
                /* If the temp file's key is lower than the current
                 * lowest... */
                cmp = memcmp(node[reading[i]], node[lowest], hash_key_bytes);
                if (cmp < 0) {
                    /* ...record the index of this temp file as having
                     * the lowest key */
                    read_idx = i;
                    lowest = reading[read_idx];
                    same_key_count = 1;
                } else if (cmp == 0) {
                    same_key[same_key_count] = i;
                    ++same_key_count;
                }
            }

            /* if same_key_count is greater than 1, the aggregate
             * values and distincts must be merged, and the result of
             * the merge is either written to a temp file or added to
             * the heap.  if same_key_count is 1, only one file has
             * this key, and we can add it to the heap. */
            if (same_key_count > 1) {
                /* merge multiple values/distincts */

                /* need to include 'lowest' in the same_key[] array */
                same_key[0] = read_idx;

                /* initialize the space we merge into */
                memcpy(merged_values, value_initial_vals, value_byte_width);
                for (d = 0; d < distinct_num_fields; ++d) {
                    skIPTreeRemoveAll(distincts[d].dv_v.dv_ipset);
                    distincts[d].dv_count = 0;
                }

                /* process files with the same key */
                for (i = 0; i < same_key_count; ++i) {
                    FILE *fp = fps[reading[same_key[i]]].f;

                    /* read and merge the value */
                    if (!fread(buf, hash_value_bytes, 1, fp)) {
                        FATAL_READ(tmp_idx_a + reading[same_key[i]]);
                    }
                    mergeValues(merged_values, buf);

                    /* read and merge distincts into IPsets */
                    for (d = 0; d < distinct_num_fields; ++d) {
                        if (!fread(&ip_count, sizeof(uint64_t), 1, fp)) {
                            FATAL_READ(tmp_idx_a + reading[same_key[i]]);
                        }
                        for (k = 0; k < ip_count; ++k) {
                            if (!fread(&ipv4, sizeof(uint32_t), 1, fp)) {
                                FATAL_READ(tmp_idx_a + reading[same_key[i]]);
                            }
                            MEMCHECK_IPSET_ADD(distincts[d].dv_v.dv_ipset,
                                               ipv4, distincts[d].dv_count);
                        }
                    }
                }

                /* write the result */
                if (tmp_fp) {
                    if (presortedNodeToTemp(tmp_fp, node[lowest],
                                            merged_values, distincts))
                    {
                        skAppPrintSyserror(("Unable to write to"
                                            " temporary file '%s'"),
                                           skTempFileGetName(idx_intermediate));
                        appExit(EXIT_FAILURE);
                    }
                } else {
                    /* create a heapnode that we may add to the heap;
                     * reset the distinct values */
                    memcpy(newnode_key, node[lowest], key_byte_width);
                    memcpy(newnode_val, merged_values, value_byte_width);
                    for (d = 0; d < distinct_num_fields; ++d) {
                        VAL_FIELD_MEMSET(newnode_val,
                                         distinct_fields[d].df_val_field_pos,
                                         &distincts[d].dv_count);
                        distincts[d].dv_count = 0;
                        skIPTreeRemoveAll(distincts[d].dv_v.dv_ipset);
                    }

                    ++limit.entries;
                    /* maybe insert the key/value into heap */
                    if (heap_num_entries < limit.value[0]) {
                        /* there is still room in the heap */
                        skHeapInsert(heap, newnode);
                        ++heap_num_entries;
                        if (heap_num_entries == limit.value[0]) {
                            /* Get the node at the top of heap and its value.
                             * This is the smallest value in the topN. */
                            skHeapPeekTop(heap, (skheapnode_t*)&heap_ptr);
                        }
                    } else if (cmp_fn(heap_ptr, newnode) > 0) {
                        skHeapReplaceTop(heap, newnode, NULL);
                        /* the top may have changed; get the new top */
                        skHeapPeekTop(heap, (skheapnode_t*)&heap_ptr);
                    }
                }

                /* replace the key for each of the files. count
                 * backwards to properly handle end-of-file */
                for (i = same_key_count; i > 0; ) {
                    --i;
                    if (!fread(node[reading[same_key[i]]], key_byte_width,
                               1, fps[reading[same_key[i]]].f))
                    {
                        /* no more data for this file; replace file
                         * with last file in 'reading' array */
                        --read_count;
                        reading[same_key[i]] = reading[read_count];
                    }
                }

                same_key_count = 1;

            } else if (tmp_fp) {
                /* need to transfer the bytes from the file we're
                 * reading to the one we are writing */

                /* read the value */
                if (!fread(merged_values, hash_value_bytes, 1, fps[lowest].f)) {
                    FATAL_READ(tmp_idx_a + reading[read_idx]);
                }
                /* write key and value */
                if (!fwrite(node[lowest], key_byte_width, 1, tmp_fp)
                    || !fwrite(merged_values, hash_value_bytes, 1, tmp_fp))
                {
                    FATAL_WRITE(idx_intermediate);
                }
                /* read and write the distincts, if any */
                for (d = 0; d < distinct_num_fields; ++d) {
                    if (!fread(&ip_count, sizeof(uint64_t), 1, fps[lowest].f)) {
                        FATAL_READ(tmp_idx_a + reading[read_idx]);
                    }
                    if (!fwrite(&ip_count, sizeof(uint64_t), 1, tmp_fp)) {
                        FATAL_WRITE(idx_intermediate);
                    }
                    ip_count *= sizeof(uint32_t);
                    while (ip_count) {
                        sz = ((ip_count<sizeof(buf)) ? ip_count : sizeof(buf));
                        if (!fread(buf, sz, 1, fps[lowest].f)) {
                            FATAL_READ(tmp_idx_a + reading[read_idx]);
                        }
                        if (!fwrite(buf, sz, 1, tmp_fp)) {
                            FATAL_WRITE(idx_intermediate);
                        }
                        ip_count -= sz;
                    }
                }

                /* replace the key for this file */
                if (!fread(node[lowest], key_byte_width, 1, fps[lowest].f)) {
                    /* no more data for this file; replace file with last
                     * file in 'reading' array */
                    --read_count;
                    reading[read_idx] = reading[read_count];
                }

            } else {
                /* add this value to the heap */
                memcpy(newnode_key, node[lowest], key_byte_width);

                /* read the value */
                if (!fread(newnode_val, value_byte_width, 1, fps[lowest].f)) {
                    FATAL_READ(tmp_idx_a + reading[read_idx]);
                }

                /* read the distinct counts directly from the input
                 * file, and seek over the actual IPs */
                for (d = 0; d < distinct_num_fields; ++d) {
                    if (!fread(&distincts[d].dv_count, sizeof(uint64_t),
                               1, fps[lowest].f))
                    {
                        FATAL_READ(tmp_idx_a + reading[read_idx]);
                    }
                    ip_count = distincts[d].dv_count * sizeof(uint32_t);
                    if (-1==fseeko(fps[lowest].f, (off_t)ip_count, SEEK_CUR)) {
                        FATAL_SEEK(tmp_idx_a + reading[read_idx]);
                    }
                    VAL_FIELD_MEMSET(newnode_val,
                                     distinct_fields[d].df_val_field_pos,
                                     &distincts[d].dv_count);
                }

                ++limit.entries;
                /* maybe insert the key/value into heap */
                if (heap_num_entries < limit.value[0]) {
                    /* there is still room in the heap */
                    skHeapInsert(heap, newnode);
                    ++heap_num_entries;
                    if (heap_num_entries == limit.value[0]) {
                        /* Get the node at the top of heap and its value.
                         * This is the smallest value in the topN. */
                        skHeapPeekTop(heap, (skheapnode_t*)&heap_ptr);
                    }
                } else if (cmp_fn(heap_ptr, newnode) > 0) {
                    skHeapReplaceTop(heap, newnode, NULL);
                    /* the top may have changed; get the new top */
                    skHeapPeekTop(heap, (skheapnode_t*)&heap_ptr);
                }

                /* replace the key for this file */
                if (!fread(node[lowest], key_byte_width, 1, fps[lowest].f)) {
                    /* no more data for this file; replace file with last
                     * file in 'reading' array */
                    --read_count;
                    reading[read_idx] = reading[read_count];
                }
            }
        }

        TRACEMSG((("Finished processing #%d through #%d"),
                  tmp_idx_a, tmp_idx_b));

        /* Close all of the temp files that we processed this time. */
        for (i = 0; i < open_count; ++i) {
            fclose(fps[i].f);
        }
        /* Delete all the temp files that we opened */
        for (j = tmp_idx_a; j <= tmp_idx_b; ++j) {
            skTempFileRemove(j);
        }

        /* Close the intermediate temp file. */
        if (tmp_fp) {
            TRACEMSG(("Finished writing #%d '%s'",
                      idx_intermediate, skTempFileGetName(idx_intermediate)));
            if (EOF == fclose(tmp_fp)) {
                skAppPrintSyserror("Error closing temporary file '%s'",
                                   skTempFileGetName(idx_intermediate));
                appExit(EXIT_FAILURE);
            }
            tmp_fp = NULL;
        }

        /* Start the next merge with the next temp file */
        tmp_idx_a = tmp_idx_b + 1;
    }

  END:
    /* delete the IPSets */
    for (d = 0; d < distinct_num_fields; ++d) {
        distincts[d].dv_count = 0;
        skIPTreeDelete(&distincts[d].dv_v.dv_ipset);
    }
    if (0 == heap_num_entries) {
        /* no data.  print the header and clean up */
        topnPrintHeader();
        skHeapFree(heap);
    } else {
        /* generate the output and clean up */
        rwstatsPrintHeap(heap);
        skHeapFree(heap);
    }
}


static int topnMain(void)
{
    if (app_flags.presorted_input) {
        statsPresorted();
    } else {
        statsHash();
    }

    return (distinct_err ? RWSTATS_NO_MEMORY_EXIT_CODE : 0);
}


int main(int argc, char **argv)
{
    int rv = 0;

    /* Global setup */
    appSetup(argc, argv);

    if (proto_stats) {
        rv = protoStatsMain();
    } else {
        rv = topnMain();
    }

    /* Done, do cleanup */
    appTeardown();
    return rv;
}


/*
** Local Variables:
** mode:c
** indent-tabs-mode:nil
** c-basic-offset:4
** End:
*/
