|
@@ -0,0 +1,181 @@
|
|
|
+#!/usr/bin/env python3
|
|
|
+
|
|
|
+import os
|
|
|
+import re
|
|
|
+import subprocess
|
|
|
+import sys
|
|
|
+
|
|
|
+# Note on hyperthreading: on Xeon CPUs that support SGX2, each physical
|
|
|
+# core supports two hyperthreads (if enabled in the BIOS), and appears
|
|
|
+# as two virtual cores to the system (in /proc/cpuinfo, for example).
|
|
|
+# The two virtual cores that map to the same physical core are called
|
|
|
+# the "A side" and the "B side". Each physical core only has one AES
|
|
|
+# circuit, so if both hyperthreads on the same core are trying to do AES
|
|
|
+# at the same time, performance crashes. So by default, we only use one
|
|
|
+# hyperthread (the "A side") on a given physical core.
|
|
|
+
|
|
|
+# Get the list of virtual cores currently available to us
|
|
|
+def virtual_cores_available():
|
|
|
+ ret = subprocess.run(["numactl", "-s"], capture_output=True)
|
|
|
+ if ret.returncode != 0:
|
|
|
+ print("Unable to run numactl", file=sys.stderr)
|
|
|
+ sys.exit(1)
|
|
|
+ match = re.search(r'physcpubind: ((\d+ )*\d+)', str(ret.stdout))
|
|
|
+ return list(map(int,match.group(1).split(' ')))
|
|
|
+
|
|
|
+# Read /proc/cpuinfo to get a map from virtual core number to CPU number
|
|
|
+# ("physical id") and physical core ("core id"). Only track the cores
|
|
|
+# that are currently available
|
|
|
+def get_core_map():
|
|
|
+ cores_available = virtual_cores_available()
|
|
|
+ coremap = {}
|
|
|
+
|
|
|
+ with open("/proc/cpuinfo") as p:
|
|
|
+ virtcore = None
|
|
|
+ cpuid = None
|
|
|
+ coreid = None
|
|
|
+ while True:
|
|
|
+ l = p.readline()
|
|
|
+ if l == "":
|
|
|
+ break
|
|
|
+ elif l == "\n":
|
|
|
+ if virtcore is None or cpuid is None or coreid is None:
|
|
|
+ print(virtcore, cpuid, coreid)
|
|
|
+ print("Could not parse /proc/cpuinfo", file=sys.stderr)
|
|
|
+ sys.exit(1)
|
|
|
+ if virtcore in cores_available:
|
|
|
+ coremap[virtcore] = (cpuid, coreid)
|
|
|
+ virtcore = None
|
|
|
+ cpuid = None
|
|
|
+ coreid = None
|
|
|
+ elif match := re.match(r'processor\s*: (\d+)', l):
|
|
|
+ virtcore = int(match.group(1))
|
|
|
+ elif match := re.match(r'physical id\s*: (\d+)', l):
|
|
|
+ cpuid = int(match.group(1))
|
|
|
+ elif match := re.match(r'core id\s*: (\d+)', l):
|
|
|
+ coreid = int(match.group(1))
|
|
|
+ return coremap
|
|
|
+
|
|
|
+# Return an array. The first element of the array will represent the "A
|
|
|
+# sides", the second will represent the "B sides" (and if you're on some
|
|
|
+# weird CPU with more than 2 hyperthreads per core, the next will be the
|
|
|
+# "C sides", etc.). Each element will be a map from cpuid to a list of
|
|
|
+# the available virtual cores on that CPU, at most one per physical
|
|
|
+# core.
|
|
|
+def get_core_layout():
|
|
|
+ core_map = get_core_map()
|
|
|
+ retarray = []
|
|
|
+ while core_map:
|
|
|
+ # Extract the first virtual core for each (cpuid, physical core)
|
|
|
+ # from core_map
|
|
|
+ current_side_map = {}
|
|
|
+ virtual_cores_remaining = list(core_map.keys())
|
|
|
+ for vcore in virtual_cores_remaining:
|
|
|
+ (cpuid, coreid) = core_map[vcore]
|
|
|
+ if cpuid not in current_side_map:
|
|
|
+ current_side_map[cpuid] = {}
|
|
|
+ if coreid not in current_side_map[cpuid]:
|
|
|
+ current_side_map[cpuid][coreid] = vcore
|
|
|
+ del core_map[vcore]
|
|
|
+
|
|
|
+ current_side = {}
|
|
|
+ for cpuid in current_side_map:
|
|
|
+ current_side[cpuid] = list(current_side_map[cpuid].values())
|
|
|
+ retarray.append(current_side)
|
|
|
+ return retarray
|
|
|
+
|
|
|
+core_layout = get_core_layout()
|
|
|
+
|
|
|
+# Maximum number of cores to use for clients (but we'll prefer to use
|
|
|
+# fewer cores instead of overloading cores)
|
|
|
+CLIENT_MAX_CORES = 8
|
|
|
+
|
|
|
+# Return a core allocation for an experiment. Pass in the number of
|
|
|
+# servers, and the number of cores per server. The return value is a
|
|
|
+# pair. The first element is a list of length num_servers, each
|
|
|
+# element of which is the core allocation for one server (which will be
|
|
|
+# a list of length cores_per_server). The second element of the return
|
|
|
+# value is the core allocation for the clients (which will be a list of
|
|
|
+# length between 1 and CLIENT_MAX_CORES). If the environment variable
|
|
|
+# OVERLOAD_CORES is unset or set to 0, each available physical core will
|
|
|
+# be used at most once. If that is not possible, (None, None) will be
|
|
|
+# returned. If OVERLOAD_CORES is set to 1, then physical cores will be
|
|
|
+# reused when necessary in order to run the requested experiment, albeit
|
|
|
+# at a significant performance penalty. It must be the case in any
|
|
|
+# event that you have at least one CPU with at least cores_per_server
|
|
|
+# physical cores.
|
|
|
+def core_allocation(num_servers, cores_per_server):
|
|
|
+ overload_cores = \
|
|
|
+ os.getenv("OVERLOAD_CORES", '0').lower() in ('true', '1', 't')
|
|
|
+
|
|
|
+ servers_allocation = []
|
|
|
+ client_allocation = []
|
|
|
+
|
|
|
+ # Which index into core_layout we are currently working with
|
|
|
+ hyperthread_side = 0
|
|
|
+
|
|
|
+ # Copy that entry of the core_layout
|
|
|
+ current_cores = dict(core_layout[hyperthread_side])
|
|
|
+
|
|
|
+ while len(servers_allocation) < num_servers or \
|
|
|
+ len(client_allocation) < CLIENT_MAX_CORES:
|
|
|
+ # Find the cpu with the most cores available
|
|
|
+ cpu_most_cores = None
|
|
|
+ num_most_cores = None
|
|
|
+ for cpuid in current_cores:
|
|
|
+ num_cores = len(current_cores[cpuid])
|
|
|
+ if num_cores > 0 and \
|
|
|
+ (num_most_cores is None or num_cores > num_most_cores):
|
|
|
+ cpu_most_cores = cpuid
|
|
|
+ num_most_cores = num_cores
|
|
|
+
|
|
|
+ if num_most_cores is not None and \
|
|
|
+ num_most_cores >= cores_per_server and \
|
|
|
+ len(servers_allocation) < num_servers:
|
|
|
+ servers_allocation.append(
|
|
|
+ current_cores[cpu_most_cores][0:cores_per_server])
|
|
|
+ current_cores[cpu_most_cores] = \
|
|
|
+ current_cores[cpu_most_cores][cores_per_server:]
|
|
|
+ continue
|
|
|
+
|
|
|
+ # We could not find a suitable allocation for the next server.
|
|
|
+ # Try allocating a core for clients, if we still could use some.
|
|
|
+ if num_most_cores is not None and \
|
|
|
+ num_most_cores >= 1 and \
|
|
|
+ len(client_allocation) < CLIENT_MAX_CORES:
|
|
|
+ client_allocation.append(current_cores[cpu_most_cores][0])
|
|
|
+ current_cores[cpu_most_cores] = \
|
|
|
+ current_cores[cpu_most_cores][1:]
|
|
|
+ continue
|
|
|
+
|
|
|
+ # We can't do an allocation. If we have all the server
|
|
|
+ # allocations, and at least one client core allocated, that'll
|
|
|
+ # be good enough.
|
|
|
+ if len(servers_allocation) == num_servers and \
|
|
|
+ len(client_allocation) >= 1:
|
|
|
+ break
|
|
|
+
|
|
|
+ # We're going to have to overload cores, if allowed
|
|
|
+ if not overload_cores:
|
|
|
+ return (None, None)
|
|
|
+
|
|
|
+ hyperthread_side = (hyperthread_side + 1) % len(core_layout)
|
|
|
+ # Copy that entry of the core_layout
|
|
|
+ current_cores = dict(core_layout[hyperthread_side])
|
|
|
+
|
|
|
+
|
|
|
+ return (servers_allocation, client_allocation)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == "__main__":
|
|
|
+ if len(sys.argv) > 1:
|
|
|
+ num_servers = int(sys.argv[1])
|
|
|
+ else:
|
|
|
+ num_servers = 4
|
|
|
+ if len(sys.argv) > 2:
|
|
|
+ cores_per_server = int(sys.argv[2])
|
|
|
+ else:
|
|
|
+ cores_per_server = 1
|
|
|
+ print(core_allocation(num_servers,cores_per_server))
|