#!/usr/bin/env python3

import os
import re
import subprocess
import sys

# Note on hyperthreading: on Xeon CPUs that support SGX2, each physical
# core supports two hyperthreads (if enabled in the BIOS), and appears
# as two virtual cores to the system (in /proc/cpuinfo, for example).
# The two virtual cores that map to the same physical core are called
# the "A side" and the "B side".  Each physical core only has one AES
# circuit, so if both hyperthreads on the same core are trying to do AES
# at the same time, performance crashes.  So by default, we only use one
# hyperthread (the "A side") on a given physical core.

# Get the list of virtual cores currently available to us
def virtual_cores_available():
    ret = subprocess.run(["numactl", "-s"], capture_output=True)
    if ret.returncode != 0:
        print("Unable to run numactl", file=sys.stderr)
        sys.exit(1)
    match = re.search(r'physcpubind: ((\d+ )*\d+)', str(ret.stdout))
    return list(map(int,match.group(1).split(' ')))

# Read /proc/cpuinfo to get a map from virtual core number to CPU number
# ("physical id") and physical core ("core id").  Only track the cores
# that are currently available
def get_core_map():
    cores_available = virtual_cores_available()
    coremap = {}

    with open("/proc/cpuinfo") as p:
        virtcore = None
        cpuid = None
        coreid = None
        while True:
            l = p.readline()
            if l == "":
                break
            elif l == "\n":
                if virtcore is None or cpuid is None or coreid is None:
                    print(virtcore, cpuid, coreid)
                    print("Could not parse /proc/cpuinfo", file=sys.stderr)
                    sys.exit(1)
                if virtcore in cores_available:
                    coremap[virtcore] = (cpuid, coreid)
                virtcore = None
                cpuid = None
                coreid = None
            elif match := re.match(r'processor\s*: (\d+)', l):
                virtcore = int(match.group(1))
            elif match := re.match(r'physical id\s*: (\d+)', l):
                cpuid = int(match.group(1))
            elif match := re.match(r'core id\s*: (\d+)', l):
                coreid = int(match.group(1))
    return coremap

# Return an array.  The first element of the array will represent the "A
# sides", the second will represent the "B sides" (and if you're on some
# weird CPU with more than 2 hyperthreads per core, the next will be the
# "C sides", etc.).  Each element will be a map from cpuid to a list of
# the available virtual cores on that CPU, at most one per physical
# core.
def get_core_layout():
    core_map = get_core_map()
    retarray = []
    while core_map:
        # Extract the first virtual core for each (cpuid, physical core)
        # from core_map
        current_side_map = {}
        virtual_cores_remaining = list(core_map.keys())
        for vcore in virtual_cores_remaining:
            (cpuid, coreid) = core_map[vcore]
            if cpuid not in current_side_map:
                current_side_map[cpuid] = {}
            if coreid not in current_side_map[cpuid]:
                current_side_map[cpuid][coreid] = vcore
                del core_map[vcore]

        current_side = {}
        for cpuid in current_side_map:
            current_side[cpuid] = list(current_side_map[cpuid].values())
        retarray.append(current_side)
    return retarray

core_layout = get_core_layout()

# Maximum number of cores to use for clients (but we'll prefer to use
# fewer cores instead of overloading cores)
CLIENT_MAX_CORES = 8

# Return a core allocation for an experiment.  Pass in the number of
# servers, and the number of cores per server.  The return value is a
# pair.  The first element is a list of length num_servers, each
# element of which is the core allocation for one server (which will be
# a list of length cores_per_server).  The second element of the return
# value is the core allocation for the clients (which will be a list of
# length between 1 and CLIENT_MAX_CORES).  If the environment variable
# OVERLOAD_CORES is unset or set to 0, each available physical core will
# be used at most once.  If that is not possible, (None, None) will be
# returned.  If OVERLOAD_CORES is set to 1, then physical cores will be
# reused when necessary in order to run the requested experiment, albeit
# at a significant performance penalty.  It must be the case in any
# event that you have at least one CPU with at least cores_per_server
# physical cores.
def core_allocation(num_servers, cores_per_server):
    overload_cores = \
        os.getenv("OVERLOAD_CORES", '0').lower() in ('true', '1', 't')

    servers_allocation = []
    client_allocation = []

    # Which index into core_layout we are currently working with
    hyperthread_side = 0

    # Copy that entry of the core_layout
    current_cores = dict(core_layout[hyperthread_side])

    while len(servers_allocation) < num_servers or \
            len(client_allocation) < CLIENT_MAX_CORES:
        # Find the cpu with the most cores available
        cpu_most_cores = None
        num_most_cores = None
        for cpuid in current_cores:
            num_cores = len(current_cores[cpuid])
            if num_cores > 0 and \
                    (num_most_cores is None or num_cores > num_most_cores):
                cpu_most_cores = cpuid
                num_most_cores = num_cores

        if num_most_cores is not None and \
                num_most_cores >= cores_per_server and \
                len(servers_allocation) < num_servers:
            servers_allocation.append(
                current_cores[cpu_most_cores][0:cores_per_server])
            current_cores[cpu_most_cores] = \
                current_cores[cpu_most_cores][cores_per_server:]
            continue

        # We could not find a suitable allocation for the next server.
        # Try allocating a core for clients, if we still could use some.
        if num_most_cores is not None and \
                num_most_cores >= 1 and \
                len(client_allocation) < CLIENT_MAX_CORES:
            client_allocation.append(current_cores[cpu_most_cores][0])
            current_cores[cpu_most_cores] = \
                current_cores[cpu_most_cores][1:]
            continue

        # We can't do an allocation.  If we have all the server
        # allocations, and at least one client core allocated, that'll
        # be good enough.
        if len(servers_allocation) == num_servers and \
                len(client_allocation) >= 1:
            break

        # We're going to have to overload cores, if allowed
        if not overload_cores:
            return (None, None)

        hyperthread_side = (hyperthread_side + 1) % len(core_layout)
        # Copy that entry of the core_layout
        current_cores = dict(core_layout[hyperthread_side])


    return (servers_allocation, client_allocation)


if __name__ == "__main__":
    if len(sys.argv) > 1:
        num_servers = int(sys.argv[1])
    else:
        num_servers = 4
    if len(sys.argv) > 2:
        cores_per_server = int(sys.argv[2])
    else:
        cores_per_server = 1
    print(core_allocation(num_servers,cores_per_server))