#! /usr/bin/python3

import re
import sys
import os
import random
import ipaddress
import gzip
import radix

@staticmethod
def _usage():
    print(
        "usage: ark-team-probing-targets-generator $dnp $prefs $size $dir $cycle\n"
        "       dnp: file containing prefixes to not probe\n"
        "       prefs: gzip-compressed file containing routed prefixes\n"
        "       size:  the number of IP addresses per chunk\n"
        "       dir:   directory to write each file to\n"
        "       cycle: the cycle id these targets belong to\n")

@staticmethod
def _main(dnpfile, preffile, maxcount, dirname, cycle):
    prefre = re.compile("^(\\d+\\.\\d+\\.\\d+\\.\\d+)\\s+(\\d+)")

    # read the do-not-probe list
    dnp = radix.Radix()
    with open(dnpfile, encoding="utf-8") as f:
        for line in f:
            match = prefre.match(line)
            if not match:
                continue
            net = match.group(1)
            netlen = match.group(2)
            pref = ipaddress.ip_network(f"{net}/{netlen}", strict=False)
            dnp.add(str(pref))

    # read the prefix list
    slash24s = radix.Radix()
    with gzip.open(preffile, mode='rt', encoding="utf-8") as f:
        for line in f:
            match = prefre.match(line)
            if not match:
                continue
            net = match.group(1)
            netlen = int(match.group(2))
            if netlen < 8 or netlen > 24:
                continue
            pref = ipaddress.ip_network(f"{net}/{netlen}", strict=True)
            net_addr = pref.network_address
            for i in range(int(pref.num_addresses / 256)):
                slash24 = ipaddress.ip_network(f"{net_addr + (i * 256)}/24")
                if dnp.search_best(str(slash24)):
                    continue
                slash24s.add(str(slash24))

    # determine a random address in each /24, excluding theoretical
    # network + broadcast, and any in the DNP list.
    addrs = []
    for rnode in slash24s:
        net_addr = ipaddress.ip_address(rnode.network)
        offs = list(range(1, 255))
        random.shuffle(offs)
        for off in offs:
            candidate = f"{net_addr + off}"
            if not dnp.search_best(candidate):
                addrs.append(candidate)
                break

    # write a series of files that contain the defined number of
    # addresses in them
    filenum = 0
    outfile = None
    count = 0
    random.shuffle(addrs)
    for i in addrs:
        if outfile is None:
            outfile = open(f"{dirname}/targets.c{cycle:06}.{filenum:06}",
                    mode='w', encoding="utf-8")
        outfile.write(f"{i}\n")
        count = count + 1
        if count == maxcount:
            outfile.close()
            outfile = None
            count = 0
            filenum += 1
    if outfile is not None:
        outfile.close()
    return 0

if __name__ == "__main__":
    if (len(sys.argv) != 6 or
        not os.path.isfile(sys.argv[1]) or
        not os.path.isfile(sys.argv[2]) or not re.search("\\.gz$", sys.argv[2]) or
        not re.match("^\\d+$", sys.argv[3]) or int(sys.argv[3]) < 1500 or
        not os.path.isdir(sys.argv[4]) or
        not re.match("^\\d+$", sys.argv[5])):
        _usage()
        sys.exit(-1)
    sys.exit(_main(sys.argv[1], sys.argv[2], int(sys.argv[3]), sys.argv[4], int(sys.argv[5])))
