#!/usr/bin/env python3

# See:
# indy:/usr/local/ark/cron-jobs/export-monitors-yaml
# https://gitlab.caida.org/CAIDA/ark/ark-toolbox/-/blob/main/pphick/python/ark_dory.py

import argparse
from collections import defaultdict
import json
import os
import sys
import time
import yaml

import caida_oidc_client
from requests_oauthlib import OAuth2Session

REALM = "CAIDA"
API_URL = "https://api.arkmon.caida.org"
AUTH_URL = f"https://auth.caida.org/realms/{REALM}/protocol/openid-connect"
CLIENT_ID = "arkmon-offline"

# only values different to the default need to be set per monitor
MONITOR_DEFAULTS = """
---
.defaults: monitor
ssh_user: ark
ssh_port: 0
node_id: 0
#sudo_su_command: sudo -i -u %%USER%%
scamper_arguments: -P 8743 -p 100 -M %%MONITOR%% -c "trace -P icmp-paris -q 3 -L 1"
scamper_executable: scamper-20070523m
enabled: true
ip_address: 0.0.0.0
ip6_address: "::"
team: 1
os_version: linux-raspbian
as_number: "0"
organization_url: http://www.caida.org/
organization_aup: http://www.caida.org/projects/ark/moc/
hardware_origin: CAIDA
# All activities in 2018-08. Default is false.
activity_boxmap: false
activity_congestion: false
activity_iffinder: false
activity_ioda: false
activity_itm: false
activity_midar: false
activity_ndt: false
activity_perfv4v6: false
activity_prefix_probing: false
activity_spoofer: false
activity_team_probing: false
activity_topo_on_demand: false
activity_topo_v6: false
activity_tracetun: false
activity_youtube: false
activity_vrfinder: false
activity_dns_access: false
"""


def get_hardware_origin(value):
    if value:
        return "Contributed"
    return "CAIDA"


# massage some of the data into the correct python types
transform = defaultdict(lambda: lambda x: x, {
    "hardware_origin": get_hardware_origin,
    "monitor_state": str.lower,
    "organization_classification": str.lower,
    }
)

# map of output field names to the names of the input fields. I'm not certain
# which fields are actually used, so it's current safer to use the old names
fields = {
    "activation_time": ["activation"],
    "activities": ["activities"],
    "as_number": ["asn", "asguess"],
    "contact_admin": ["emailadmin"],
    "contact_tech": ["emailtech"],
    "cpu_arch": ["cpuarch"],
    "dns_server": ["dnsserver"],
    "geographic_latitude": ["latitude"],
    "geographic_longitude": ["longitude"],
    "hardware_origin": ["contributed"],
    "hardware_type": ["hwtype"],
    "mac_address": ["hwmac"],
    "has_ipv6": ["pingipv6"],
    "ip_address": ["ipv4global"],
    "ip6_address": ["ipv6global", "ipv6local"],
    "monitor_state": ["status"],
    "natpport": ["natpport"],
    "ntp_server": ["ntpserver"],
    "organization_aup": ["orgmoc"],
    "organization_classification": ["orgtype"],
    "organization_name": ["orgname", "orgguess"],
    "organization_url": ["orgurl"],
    "serial_number": ["serialnr"],
    "ssh_all": ["sshall"],
    "ssh_host_key_pub": ["sshpubkey"],
    "ssh_one": ["sshnow"],
}


def load_token_info(token_file):
    with open(token_file, "r", encoding="ascii") as f:
        token_info = json.load(f)
    if "expires_at" not in token_info or token_info["expires_at"] < time.time():
        refresh_token = token_info['refresh_token']
        token_info.clear()
        token_info['refresh_token'] = refresh_token
        token_info['expires_in'] = -1
        token_info['access_token'] = 'dummy value for oauthlib'
    return token_info


def main():
    parser = argparse.ArgumentParser(
        description="Export monitor list from dory")
        #formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("-t", "--token-file",
        default="/etc/ark/.arkmon-offline.token",
        help="name of file containing offline token (default: /etc/ark/.arkmon-offline.token)")
    parser.add_argument("filename", type=argparse.FileType("w"), nargs="?",
            default=sys.stdout,
            help="output file to write to (default: stdout)")
    args = parser.parse_args()

    token_info = load_token_info(args.token_file)
    save_tokens = caida_oidc_client.make_save_tokens(args.token_file)

    # establish a new session, refreshing access token if necessary
    session = OAuth2Session(client_id=CLIENT_ID,
                token=token_info,
                auto_refresh_url=f"{AUTH_URL}/token",
                auto_refresh_kwargs={"client_id": CLIENT_ID},
                token_updater=save_tokens)

    if not session.authorized:
        print("Failed to authorize session, aborting")
        sys.exit(1)

    try:
        response = session.request("GET", f"{API_URL}/monitors/")
    except Exception as e:
        print(f"Failed to fetch {API_URL}/monitors: {e}")
        sys.exit(1)

    if response.status_code != 200:
        print(f"Got status code {response.status_code}, aborting")
        sys.exit(1)

    monitors = response.json()
    if len(monitors) == 0:
        print("Empty monitors list, aborting")
        sys.exit(1)

    # print global defaults
    print(MONITOR_DEFAULTS, file=args.filename)

    # print per monitor overrides
    for monitor in monitors:
        out = {}
        # TODO I think every node should have a "node" attribute now and the
        # constructed hostname with a serial number is redundant?
        out[".monitor"] = monitor["node"] or f"p{monitor['serialnr']}-cc"

        # copy all the fields that just get renamed or slightly tweaked
        for output_key, input_keys in fields.items():
            for input_key in input_keys:
                if value := monitor.get(input_key):
                    out[output_key] = transform[output_key](value)
                    break

        # add the fields that are slightly more complicated
        location = []
        for k in ["city", "state", "country"]:
            if monitor.get(k):
                location.append(monitor[k])
        if len(location) > 0:
            out["geographic_location"] = ", ".join(location)

        if out.get("monitor_state") != "active":
            out["enabled"] = False

        print("---", file=args.filename)
        print(yaml.dump(out, default_flow_style=False), file=args.filename)


if __name__ == "__main__":
    main()
