#!/usr/bin/env python3 import os import re import subprocess import sys # Note on hyperthreading: on Xeon CPUs that support SGX2, each physical # core supports two hyperthreads (if enabled in the BIOS), and appears # as two virtual cores to the system (in /proc/cpuinfo, for example). # The two virtual cores that map to the same physical core are called # the "A side" and the "B side". Each physical core only has one AES # circuit, so if both hyperthreads on the same core are trying to do AES # at the same time, performance crashes. So by default, we only use one # hyperthread (the "A side") on a given physical core. # Get the list of virtual cores currently available to us def virtual_cores_available(): ret = subprocess.run(["numactl", "-s"], capture_output=True) if ret.returncode != 0: print("Unable to run numactl", file=sys.stderr) sys.exit(1) match = re.search(r'physcpubind: ((\d+ )*\d+)', str(ret.stdout)) return list(map(int,match.group(1).split(' '))) # Read /proc/cpuinfo to get a map from virtual core number to CPU number # ("physical id") and physical core ("core id"). Only track the cores # that are currently available def get_core_map(): cores_available = virtual_cores_available() coremap = {} with open("/proc/cpuinfo") as p: virtcore = None cpuid = None coreid = None while True: l = p.readline() if l == "": break elif l == "\n": if virtcore is None or cpuid is None or coreid is None: print(virtcore, cpuid, coreid) print("Could not parse /proc/cpuinfo", file=sys.stderr) sys.exit(1) if virtcore in cores_available: coremap[virtcore] = (cpuid, coreid) virtcore = None cpuid = None coreid = None elif match := re.match(r'processor\s*: (\d+)', l): virtcore = int(match.group(1)) elif match := re.match(r'physical id\s*: (\d+)', l): cpuid = int(match.group(1)) elif match := re.match(r'core id\s*: (\d+)', l): coreid = int(match.group(1)) return coremap # Return an array. The first element of the array will represent the "A # sides", the second will represent the "B sides" (and if you're on some # weird CPU with more than 2 hyperthreads per core, the next will be the # "C sides", etc.). Each element will be a map from cpuid to a list of # the available virtual cores on that CPU, at most one per physical # core. def get_core_layout(): core_map = get_core_map() retarray = [] while core_map: # Extract the first virtual core for each (cpuid, physical core) # from core_map current_side_map = {} virtual_cores_remaining = list(core_map.keys()) for vcore in virtual_cores_remaining: (cpuid, coreid) = core_map[vcore] if cpuid not in current_side_map: current_side_map[cpuid] = {} if coreid not in current_side_map[cpuid]: current_side_map[cpuid][coreid] = vcore del core_map[vcore] current_side = {} for cpuid in current_side_map: current_side[cpuid] = list(current_side_map[cpuid].values()) retarray.append(current_side) return retarray core_layout = get_core_layout() # Maximum number of cores to use for clients (but we'll prefer to use # fewer cores instead of overloading cores) CLIENT_MAX_CORES = 8 # Return a core allocation for an experiment. Pass in the number of # servers, and the number of cores per server. The return value is a # pair. The first element is a list of length num_servers, each # element of which is the core allocation for one server (which will be # a list of length cores_per_server). The second element of the return # value is the core allocation for the clients (which will be a list of # length between 1 and CLIENT_MAX_CORES). If the environment variable # OVERLOAD_CORES is unset or set to 0, each available physical core will # be used at most once. If that is not possible, (None, None) will be # returned. If OVERLOAD_CORES is set to 1, then physical cores will be # reused when necessary in order to run the requested experiment, albeit # at a significant performance penalty. It must be the case in any # event that you have at least one CPU with at least cores_per_server # physical cores. def core_allocation(num_servers, cores_per_server): overload_cores = \ os.getenv("OVERLOAD_CORES", '0').lower() in ('true', '1', 't') servers_allocation = [] client_allocation = [] # Which index into core_layout we are currently working with hyperthread_side = 0 # Copy that entry of the core_layout current_cores = dict(core_layout[hyperthread_side]) while len(servers_allocation) < num_servers or \ len(client_allocation) < CLIENT_MAX_CORES: # Find the cpu with the most cores available cpu_most_cores = None num_most_cores = None for cpuid in current_cores: num_cores = len(current_cores[cpuid]) if num_cores > 0 and \ (num_most_cores is None or num_cores > num_most_cores): cpu_most_cores = cpuid num_most_cores = num_cores if num_most_cores is not None and \ num_most_cores >= cores_per_server and \ len(servers_allocation) < num_servers: servers_allocation.append( current_cores[cpu_most_cores][0:cores_per_server]) current_cores[cpu_most_cores] = \ current_cores[cpu_most_cores][cores_per_server:] continue # We could not find a suitable allocation for the next server. # Try allocating a core for clients, if we still could use some. if num_most_cores is not None and \ num_most_cores >= 1 and \ len(client_allocation) < CLIENT_MAX_CORES: client_allocation.append(current_cores[cpu_most_cores][0]) current_cores[cpu_most_cores] = \ current_cores[cpu_most_cores][1:] continue # We can't do an allocation. If we have all the server # allocations, and at least one client core allocated, that'll # be good enough. if len(servers_allocation) == num_servers and \ len(client_allocation) >= 1: break # We're going to have to overload cores, if allowed if not overload_cores: return (None, None) hyperthread_side = (hyperthread_side + 1) % len(core_layout) # Copy that entry of the core_layout current_cores = dict(core_layout[hyperthread_side]) return (servers_allocation, client_allocation) if __name__ == "__main__": if len(sys.argv) > 1: num_servers = int(sys.argv[1]) else: num_servers = 4 if len(sys.argv) > 2: cores_per_server = int(sys.argv[2]) else: cores_per_server = 1 print(core_allocation(num_servers,cores_per_server))