/*
 * $Id: ripe2cidr.c 1.2 Thu, 30 Dec 1999 03:55:40 +0100 smurf $
 *
 * Read a network list as extracted from the RIPE DB
 * which contains entries of the form
 * <network>
 * or
 * <network> - <network>
 * or, since CIDR,
 * <address> - <address>
 *
 * For convenience, lines in CIDR format
 * <address>/<bitmask>
 * can also appear in the input, so we can read our own output. This
 * allows for merging of address ranges.
 *
 * Generate a machine useable list of address/mask pairs from it
 * Reduce this list by combining nets
 *
 * The original purpose of this program was to read networks.DE
 * (the list of all networks from the RIPE DB that have a country
 * of DE) and generate a minimized list of address/mask pairs to
 * be used in IP accounting to decide whether given traffic is within
 * Germany or international. It can, however, be used for other things
 * as well.
 *
 * This Code is Freeware and is Copyright Andre Beck 1996. Do what you
 * want with it, but leave the copyright intact.
 *
 * If you find bugs, contact me at <beck@ibh-dd.de>
 *
 * $Log: ripe2cidr.c,v $
 * Revision 1.1  1996/11/26 03:03:31  smurf
 * Initial
 *
 * Revision 1.4  1996/07/09  13:05:33  beck
 * Bug fixed with one address ranges
 * Minor typos and warnings fixed
 *
 * Revision 1.3  1996/07/07  11:12:27  beck
 * Added RIPE format output
 * Added CIDR format input
 * Fixed problem in bits2Mask with bits=0
 *
 * Revision 1.2  1996/07/06  22:47:05  beck
 * cleaned up two warnings
 *
 * Revision 1.1  1996/07/06  22:39:08  beck
 * Initial revision
 *
 *
 *
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>

typedef unsigned long uLong;

typedef enum { ClassA, ClassB, ClassC } netClass;

typedef enum { BitMask, DottedMask, RIPE } printFormat;

typedef struct _ipnet {
 struct _ipnet *next;
 uLong addr;
 uLong mask;
} IPNet;

IPNet *netList = NULL;
long int numNets = 0;
int verbose = 2;

void *smalloc(size_t bytes)
{
 void *ptr = malloc(bytes);
 if(!ptr) {
  perror("can't allocate memory");
  exit(1);
 }
 return ptr;
}

netClass getClass(uLong addr)
{
 if(addr & 0x80000000) {
  if(addr & 0x40000000) {
   return ClassC;
  }
  return ClassB;
 }
 return ClassA;
}

/*
 * Well this would probably give a nice lookup table, but on the other
 * hand code can be inlined
 */
uLong getMask(netClass nclass)
{
 switch(nclass) {
 case ClassA:
  return 0xff000000;
  break;
 case ClassB:
  return 0xffff0000;
  break;
 case ClassC:
  return 0xffffff00;
  break;
 default:
  fprintf(stderr, "getMask called with unknown class\n");
  exit(1);
 }
}

/* call inet_ntoa(3) in the way it expects */
char *ipntoa(uLong addr)
{
 struct in_addr in;
 in.s_addr = htonl(addr);
 return inet_ntoa(in);
}

int mask2Bits(uLong mask)
{
 int bits=0;
 while(mask) {
  if ((mask & 0x01) != 0)
   bits++;
  mask = mask >> 1;
 }
 return bits;
}

uLong bits2Mask(int bits)
{
 uLong mask;
 /* because some systems have problems with the formula below
    (did you know on Linux, (1<<33) gives 2 !!!) */
 if(!bits)
  return 0;
 bits = 32-bits;
 mask = (1<<bits)-1;
 /* make sure only 32bit wide numbers are used. */
 mask = (~mask & 0xffffffff);
 return mask;
}

void addNet(uLong addr, uLong mask)
{
 IPNet *n = (IPNet *)smalloc(sizeof(IPNet));
 n->addr = addr;
 n->mask = mask;
 n->next = netList;
 netList = n;
 numNets++;
}

/* Stuff like
 * "62.159.42.160 - 62.159.42.199"
 * gets handled here.
 */

int mbits(uLong addr1, uLong addr2)
{
  uLong m, mask;
  mask = 0;
  m = (addr2+1) | addr1;
  if (m == 0) return 0; // shouldn't happen
  while(!(m & 1)) {
   m >>= 1;
   mask++;
  }
  return mask;
}

void addCIDRRange(uLong addr1, uLong addr2);
void gen_masks(uLong addr1, uLong addr2)
{
 int m, m1,m2;

 /* Try to chop stuff off the front and the back of the range,
  * see which improves the situation best.
  */
 m = mbits(addr1,addr2);
 m1 = mbits(addr1,addr2-(1<<m));
 m2 = mbits(addr1+(1<<m),addr2);
 if(m1 > m2) {
  addCIDRRange(addr1,addr2-(1<<m));
  addCIDRRange(addr2-(1<<m)+1,addr2);
 } else if(m1 < m2) {
  addCIDRRange(addr1,addr1+(1<<m)-1);
  addCIDRRange(addr1+(1<<m),addr2);
 } else {
  addCIDRRange(addr1,addr2-(1<<m));
  addCIDRRange(addr2-(1<<m)+1, addr1+(1<<m)-1); // no problem if backwards
  addCIDRRange(addr1+(1<<m),addr2);
 }
}

/* Add all nets in the given range of addresses of the same class */
void addNetRange(uLong addr1, uLong addr2, netClass nclass)
{
 uLong incr=256;
 uLong addr = addr1;
 uLong mask = getMask(nclass);
 long int n = 0;

 switch(nclass) {
 case ClassA:
  incr *= 256;	/* fallthrough intentional */
 case ClassB:
  incr *= 256;
 default: ;
 }
 do {
  addNet(addr, mask);
  n++;
  addr += incr;
 } while(addr <= addr2);
 if(verbose > 1) {
  fprintf(stderr, "EXPLODE %s - ", ipntoa(addr1));
  fprintf(stderr, "%s: %ld networks added\n", ipntoa(addr2), n);
 }
}

/* Try to CIDR match an arbitrary address range */
void addCIDRRange(uLong addr1, uLong addr2)
{
 uLong m, mask;

 /* ignore a stupid cidr range */
 if(addr2 < addr1)
  return;

 /* a one-address range is simple */
 if(addr1 == addr2) {
  addNet(addr1, 0xffffffff);
  if(verbose > 1) {
   fprintf(stderr, "CIDR %s - ", ipntoa(addr1));
   fprintf(stderr, "%s: ", ipntoa(addr2));
   fprintf(stderr, "%s/%d\n", ipntoa(addr1), 32);
  }
 }
 else {
  /* mask now *must* become a positive non-zero value */
  mask = addr2 - addr1;
  /* check it for correctness */
  m = mask + 1;
  while(!(m & 1))
   m >>= 1;
  if(m != 1) {
   gen_masks(addr1,addr2);
  }
  else {
   mask = ~mask;
   if((addr1 & mask) != addr1) {
    gen_masks(addr1,addr2);
   }
   else {
    addNet(addr1, mask);
    if(verbose > 1) {
     fprintf(stderr, "CIDR %s - ", ipntoa(addr1));
     fprintf(stderr, "%s: ", ipntoa(addr2));
     fprintf(stderr, "%s/%d\n", ipntoa(addr1), mask2Bits(mask));
    }
   }
  }
 }
}

/* compare routine for qsort */
int cmpAddr(const void *p1, const void *p2)
{
 IPNet *n1 = *((IPNet **)p1);
 IPNet *n2 = *((IPNet **)p2);
 /* when we just subtract the addresses, we get problems due to unsigned
    and signed integers and their value ranges. Thus we compare.
    */
 return (n1->addr > n2->addr ? 1 : (n1->addr == n2->addr ? 0 : -1));
}

/* sort the netList in ascending order using qsort(3) */
void sortNetList()
{
 IPNet *last;
 IPNet **netArray = (IPNet **)smalloc(numNets * sizeof(IPNet));
 IPNet *n = netList;
 long int i = 0;
 long int nn = 1;

 while(n) {
  netArray[i] = n;
  i++;
  n = n->next;
 }
 qsort(netArray, numNets, sizeof(IPNet *), cmpAddr);
 /* Duplicate check and removal. Duplicates _will_ break
    the optimization if not removed. This place is ideal */
 netList = last = netArray[0];
 for(i = 1; i < numNets; i++) {
  if(last->addr == netArray[i]->addr) {
   if(last->mask > netArray[i]->mask) {
    if(verbose) {
     fprintf(stderr, "OVERLAP: %s/%d overlaps ", ipntoa(last->addr),
	     mask2Bits(last->mask));
     fprintf(stderr, "%s/%d (removed)\n", ipntoa(netArray[i]->addr),
	     mask2Bits(netArray[i]->mask));
    }
    last->mask = netArray[i]->mask;
   }
   else {
    if(verbose) {
     fprintf(stderr, "DUPE: %s (removed)\n", ipntoa(last->addr));
    }
   }
  }
  else if((netArray[i]->addr & last->mask) == last->addr) {
   if(verbose) {
    fprintf(stderr, "OVERLAP: %s/%d overlaps ", ipntoa(netArray[i]->addr),
	    mask2Bits(netArray[i]->mask));
    fprintf(stderr, "%s/%d (removed)\n", ipntoa(last->addr),
	    mask2Bits(last->mask));
   }
  }
  else {
   last->next = netArray[i];
   last = netArray[i];
   nn++;
  }
 }
 last->next = NULL;
 free(netArray);
 numNets = nn;
}

/* Check whether we can combine networks into larger chunks. Sorting
   was the primer for this. Now we can combine two subsequent list
   members when:
   a) they have the same netmask
   b) their address ranges are touching
   c) the first address is, with respect to its netmask, 'even' (i.e.
      when shifting the address to the right so that only the network
      part remains, the last bit will be zero)
   To probe for b), we use the fact that the shifting as used for c) will
   give us a difference of exactly one for subsequent address spaces
   If the check succeeds, we do the combination.
   To get the optimal possible list, we repeatedly run through the netList
   and combine until no further combination can be done. Bubblesort greats.
   */
void optimizeNetList()
{
 int flag;
 IPNet *n1, *n2;
 int bits;
 uLong saddr1, saddr2;
 do {
  n1 = netList;
  flag = 0;
  while(n1 && n1->next) {
   n2 = n1->next;
   if(n1->mask == n2->mask) {
    bits = mask2Bits(n1->mask);
    saddr1 = n1->addr >> (32-bits);
    saddr2 = n2->addr >> (32-bits);
    if(!(saddr1 & 1)) {
     if((saddr2-saddr1) == 1) {
      /* Yep - we got a combineable pair - kick n2 out of the list */
      n1->mask = bits2Mask(bits-1);
      n1->next = n2->next;
      numNets--;
      flag = 1;
     }
    }
   }
   n1 = n1->next;
  }
 } while(flag);
}

/*
 * An address on a line by itself in the RIPE DB will usually mean a
 * classical network. We just have to decide which class applies, and
 * add this net. We check whether the net is correct, i.e. whether all
 * host bits are zero - if they are not, we ignore the net and log an
 * error
 */
void addRIPENet(const char *line)
{
 uLong addr = ntohl(inet_addr(line));
 uLong mask;
 /* inet_addr() returns -1 in case of error. This will survive the
    byte-swapping of ntohl(). However, it arrives as an uLong */
 if(addr == (uLong)-1) {
  if(verbose) fprintf(stderr, "inet_addr(3) cannot parse %s", line);
  return;
 }
 mask = getMask(getClass(addr));
 if((addr & mask) != addr) {
  if(verbose) {
   fprintf(stderr, "%s is no classical net but appears on a line by itself\n",
	   ipntoa(addr));
  }
  return;
 }
 addNet(addr, mask);
}

/*
 * This is the really complicated case, due to inconsistencies in the
 * RIPE DB itself. We watch at least these types of ranges:
 * a) <classical net> - <classical net>
 * b) a.b.c.0 - a.b.x.255
 * c) a.b.c.d - e.f.g.h
 * where a) is simple and just explodes to a list of all the classical
 * nets in the range. b) is sometimes as simple as x==c, thus naming
 * just one classical net and therefore redundant. It may, however, name
 * several nets as well. Bad enough, this may happen in any class, not
 * just C as in the example above. Due to upcoming sub-C assignments
 * case c) is more often found in the RIPE DB. It just names an address
 * range that has some arbitrary netmask. We have, of course, to validate
 * whether this range makes any sense...
 */
void addRIPERange(const char *line)
{
 uLong addr1, addr2;
 char astr1[32],astr2[32];
 netClass nclass1, nclass2;
 uLong mask1, mask2;

 if(sscanf(line, "%16s - %16s", astr1, astr2) != 2) {
  fprintf(stderr, "cannot parse: %s", line);
  return;
 }

 addr1 = ntohl(inet_addr(astr1));
 if(addr1 == (uLong)-1) {
  fprintf(stderr, "inet_addr(3) cannot parse: %s\n", astr1);
  return;
 }

 addr2 = ntohl(inet_addr(astr2));
 if(addr2 == (uLong)-1) {
  fprintf(stderr, "inet_addr(3) cannot parse: %s\n", astr2);
  return;
 }

 if(addr1 > addr2 && verbose) {
  fprintf(stderr, "stupid range: %s", line);
 }
 if (addr1 == addr2) {
   addNet(addr1, 0xFFFFFFFF);
   return;
 }

 nclass1 = getClass(addr1);
 nclass2 = getClass(addr2);
 mask1 = getMask(nclass1);
 mask2 = getMask(nclass2);
 /* a range must never have members from different classes */
 if(nclass1 == nclass2) {
  /* is first address a classic net ? */
  if((addr1 & mask1) == addr1) {
   /* is the second one as well ? */
   if((addr2 & mask2) == addr2) {
    addNetRange(addr1, addr2, nclass1);
   }
   else {
    /* ok the first one was a classic net, check whether the second is
       a classic net with an all ones host part, this should trap the
       special case b) */
    if(!(~addr2 & ~mask2)) {
     addNetRange(addr1, addr2 & mask2, nclass1);
    }
    else {
     /* well there's only one chance left - a CIDR range that by chance
	starts on a classic net base */
     addCIDRRange(addr1, addr2);
    }
   }
  }
  else {
   /* must be a CIDR range */
   addCIDRRange(addr1, addr2);
  }
 }
 else {
  if(verbose) {
   fprintf(stderr, "impossible range: %s", line);
  }
 }
}

/* trivial case - just parse a line that is already in CIDR format */
void addCIDRLine(char *line)
{
 char *delim;
 int bits;
 uLong addr, mask;

 delim = strchr(line, '/');
 *delim++ = 0;

 addr = ntohl(inet_addr(line));
 if(addr == (uLong)-1) {
  fprintf(stderr, "inet_addr(3) cannot parse: %s\n", line);
  return;
 }

 bits = atoi(delim);

 if(bits < 0 || bits > 32) {
  fprintf(stderr, "bitmask out of range: %d\n", bits);
  return;
 }

 mask = bits2Mask(bits);
 if((addr & mask) != addr) {
  if(verbose) {
   fprintf(stderr, "CIDR %s/%d: bad base address\n", ipntoa(addr), bits);
  }
  return;
 }

 addNet(addr, mask);
}

void dumpNetList(printFormat pf)
{
 IPNet *n = netList;
 uLong maskA = getMask(ClassA);
 uLong maskB = getMask(ClassB);
 uLong maskC = getMask(ClassC);

 while(n) {
  switch(pf) {
  case DottedMask:
   printf("%s ", ipntoa(n->addr));
   printf("%s\n", ipntoa(n->mask));
   break;
  case BitMask:
   printf("%s/%d\n", ipntoa(n->addr), mask2Bits(n->mask));
   break;
  case RIPE:
   if(n->mask == maskA || n->mask == maskB || n->mask == maskC) {
    printf("%s\n", ipntoa(n->addr));
   }
   else {
    printf("%s - ", ipntoa(n->addr));
    printf("%s\n", ipntoa(n->addr | ~n->mask));
   }
  }
  n = n->next;
 }
}

void usage(char *progname)
{
 fprintf(stderr,
	 "Usage: %s <options>\n"
	 "  -m   print dotted netmasks\n"
	 "  -r   print RIPE DB format output"
	 "  -n   do not optimize (minimize) the networks\n"
	 "  -q   be quiet (print only errors)\n"
	 "  -Q   be very quiet (print only fatal errors)\n",
	 progname);
}

int main(int argc, char *argv[])
{
 char line[BUFSIZ];
 int c;
 printFormat pf = BitMask;
 int doopt = 1;

 while((c = getopt(argc, argv, "mrnqQ")) != EOF) {
  switch(c) {
  case 'm':
   pf = DottedMask;
   break;
  case 'r':
   pf = RIPE;
   break;
  case 'q':
   verbose = 1;
   break;
  case 'Q':
   verbose = 0;
   break;
  case 'n':
   doopt = 0;
   break;
  case '?':
   usage(argv[0]);
   exit(1);
  }
 }


 if(verbose > 1)
  fprintf(stderr, "reading RIPE data...\n");
 while(fgets(line, BUFSIZ, stdin)) {
  if(strchr(line, '-'))
   addRIPERange(line);
  else if(strchr(line, '/'))
   addCIDRLine(line);
  else
   addRIPENet(line);
 }

 if(verbose > 1)
  fprintf(stderr, "%lu networks found\n", numNets);

 if(!numNets)
  return 1;

 if(verbose > 1)
  fprintf(stderr, "sorting networks...\n");

 sortNetList();

 if(verbose > 1)
  fprintf(stderr, "%lu networks remaining\n", numNets);

 if(doopt) {
  if(verbose > 1)
   fprintf(stderr, "optimizing networks...\n");

  optimizeNetList();

  if(verbose > 1)
   fprintf(stderr, "%lu networks remaining\n", numNets);
 }

 if(verbose > 1)
  fprintf(stderr, "dumping result list...\n");

 dumpNetList(pf);

 return 0;
}
