#!/usr/bin/env python3

# move completed prefix-probing files from staging into their final location

from datetime import datetime
from pathlib import Path
import re
import shutil
import sys

from kafka import KafkaConsumer

DATA_PATH = Path("/data/topology/ark/data/prefix-probing")
STAGING_PATH = Path("/staging/prefix-probing")
FILENAME_REGEX = re.compile(r"[a-z][a-z0-9-]{0,31}\.(?P<yyyy>[0-9]{4})(?P<mm>[0-9]{2})")
LOG_PATH = Path("/staging/prefix-probing/download.log")
COMPLETION_LOG = Path(DATA_PATH, "file-completion.log")

class Archiver():
    def __init__(self):
        self._connect()

    def _connect(self):
        # this good enough for our threat model for now
        server = "localhost:9092"
        username = "ark-prefix-probing-archiver"
        password = "ark-prefix-probing-archiver"

        print(f"Connecting to kafka server {server}")

        self._source = KafkaConsumer(
            "ark-downloaded-data",
            bootstrap_servers=server,
            auto_offset_reset="earliest",
            enable_auto_commit=False,
            client_id="ark-prefix-probing-archiver",
            group_id="ark-prefix-probing-archiver",
            security_protocol="SASL_PLAINTEXT",
            sasl_mechanism="SCRAM-SHA-256",
            sasl_plain_username=username,
            sasl_plain_password=password)

    def _process_message(self, msg):
        try:
            monitor, activity, filename = msg.value.decode().split("|")
        except ValueError:
            print(f"Invalid message payload: {msg.value}")
            return

        if activity != "prefix-probing":
            return

        print(msg.value)

        matches = FILENAME_REGEX.match(filename)
        if matches is None:
            return

        year = matches.group("yyyy")
        month = matches.group("mm")

        final_path = DATA_PATH / year / month

        # create the final directory if it doesn't already exist
        try:
            final_path.mkdir(parents=True, exist_ok=True)
        except Exception as exc: # pylint: disable=broad-except
            print(f'could not create final directory: {exc}')
            sys.exit(1)

        # move the staged file into its final location
        src = STAGING_PATH/monitor/filename
        dst = final_path/filename
        if not dst.exists():
            try:
                # cross device, so can't use os.rename()
                shutil.move(src, dst)
            except Exception as exc: # pylint: disable=broad-except
                print(f'could not move {src} to {dst}: {exc}')
                sys.exit(1)
        else:
            print(f'Skipped rename of {src} to {dst}: already exists')
            return

        # TODO does anything actually use these log files?
        # TODO truncate to last 200 lines of completion log
        # TODO maybe easier to trim from 250 or 300 down to 200, so it
        # doesn't happen every single write?
        with open(LOG_PATH, "a", encoding="utf-8") as outfile:
            outfile.write(f"{year}/{month}/{filename}\n")
        with open(COMPLETION_LOG, "a", encoding="utf-8") as outfile:
            now = int(datetime.now().timestamp())
            outfile.write(f"{now}      {year}/{month}/{filename}\n")

    def run(self):
        """Loop forever retrieving and processing messages."""
        print("Running archiver")
        for message in self._source:
            self._process_message(message)
            self._source.commit()


if __name__ == "__main__":
    archiver = Archiver()
    archiver.run()
