123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181 |
- #!/usr/bin/env python3
- import os
- import re
- import subprocess
- import sys
- # Note on hyperthreading: on Xeon CPUs that support SGX2, each physical
- # core supports two hyperthreads (if enabled in the BIOS), and appears
- # as two virtual cores to the system (in /proc/cpuinfo, for example).
- # The two virtual cores that map to the same physical core are called
- # the "A side" and the "B side". Each physical core only has one AES
- # circuit, so if both hyperthreads on the same core are trying to do AES
- # at the same time, performance crashes. So by default, we only use one
- # hyperthread (the "A side") on a given physical core.
- # Get the list of virtual cores currently available to us
- def virtual_cores_available():
- ret = subprocess.run(["numactl", "-s"], capture_output=True)
- if ret.returncode != 0:
- print("Unable to run numactl", file=sys.stderr)
- sys.exit(1)
- match = re.search(r'physcpubind: ((\d+ )*\d+)', str(ret.stdout))
- return list(map(int,match.group(1).split(' ')))
- # Read /proc/cpuinfo to get a map from virtual core number to CPU number
- # ("physical id") and physical core ("core id"). Only track the cores
- # that are currently available
- def get_core_map():
- cores_available = virtual_cores_available()
- coremap = {}
- with open("/proc/cpuinfo") as p:
- virtcore = None
- cpuid = None
- coreid = None
- while True:
- l = p.readline()
- if l == "":
- break
- elif l == "\n":
- if virtcore is None or cpuid is None or coreid is None:
- print(virtcore, cpuid, coreid)
- print("Could not parse /proc/cpuinfo", file=sys.stderr)
- sys.exit(1)
- if virtcore in cores_available:
- coremap[virtcore] = (cpuid, coreid)
- virtcore = None
- cpuid = None
- coreid = None
- elif match := re.match(r'processor\s*: (\d+)', l):
- virtcore = int(match.group(1))
- elif match := re.match(r'physical id\s*: (\d+)', l):
- cpuid = int(match.group(1))
- elif match := re.match(r'core id\s*: (\d+)', l):
- coreid = int(match.group(1))
- return coremap
- # Return an array. The first element of the array will represent the "A
- # sides", the second will represent the "B sides" (and if you're on some
- # weird CPU with more than 2 hyperthreads per core, the next will be the
- # "C sides", etc.). Each element will be a map from cpuid to a list of
- # the available virtual cores on that CPU, at most one per physical
- # core.
- def get_core_layout():
- core_map = get_core_map()
- retarray = []
- while core_map:
- # Extract the first virtual core for each (cpuid, physical core)
- # from core_map
- current_side_map = {}
- virtual_cores_remaining = list(core_map.keys())
- for vcore in virtual_cores_remaining:
- (cpuid, coreid) = core_map[vcore]
- if cpuid not in current_side_map:
- current_side_map[cpuid] = {}
- if coreid not in current_side_map[cpuid]:
- current_side_map[cpuid][coreid] = vcore
- del core_map[vcore]
- current_side = {}
- for cpuid in current_side_map:
- current_side[cpuid] = list(current_side_map[cpuid].values())
- retarray.append(current_side)
- return retarray
- core_layout = get_core_layout()
- # Maximum number of cores to use for clients (but we'll prefer to use
- # fewer cores instead of overloading cores)
- CLIENT_MAX_CORES = 8
- # Return a core allocation for an experiment. Pass in the number of
- # servers, and the number of cores per server. The return value is a
- # pair. The first element is a list of length num_servers, each
- # element of which is the core allocation for one server (which will be
- # a list of length cores_per_server). The second element of the return
- # value is the core allocation for the clients (which will be a list of
- # length between 1 and CLIENT_MAX_CORES). If the environment variable
- # OVERLOAD_CORES is unset or set to 0, each available physical core will
- # be used at most once. If that is not possible, (None, None) will be
- # returned. If OVERLOAD_CORES is set to 1, then physical cores will be
- # reused when necessary in order to run the requested experiment, albeit
- # at a significant performance penalty. It must be the case in any
- # event that you have at least one CPU with at least cores_per_server
- # physical cores.
- def core_allocation(num_servers, cores_per_server):
- overload_cores = \
- os.getenv("OVERLOAD_CORES", '0').lower() in ('true', '1', 't')
- servers_allocation = []
- client_allocation = []
- # Which index into core_layout we are currently working with
- hyperthread_side = 0
- # Copy that entry of the core_layout
- current_cores = dict(core_layout[hyperthread_side])
- while len(servers_allocation) < num_servers or \
- len(client_allocation) < CLIENT_MAX_CORES:
- # Find the cpu with the most cores available
- cpu_most_cores = None
- num_most_cores = None
- for cpuid in current_cores:
- num_cores = len(current_cores[cpuid])
- if num_cores > 0 and \
- (num_most_cores is None or num_cores > num_most_cores):
- cpu_most_cores = cpuid
- num_most_cores = num_cores
- if num_most_cores is not None and \
- num_most_cores >= cores_per_server and \
- len(servers_allocation) < num_servers:
- servers_allocation.append(
- current_cores[cpu_most_cores][0:cores_per_server])
- current_cores[cpu_most_cores] = \
- current_cores[cpu_most_cores][cores_per_server:]
- continue
- # We could not find a suitable allocation for the next server.
- # Try allocating a core for clients, if we still could use some.
- if num_most_cores is not None and \
- num_most_cores >= 1 and \
- len(client_allocation) < CLIENT_MAX_CORES:
- client_allocation.append(current_cores[cpu_most_cores][0])
- current_cores[cpu_most_cores] = \
- current_cores[cpu_most_cores][1:]
- continue
- # We can't do an allocation. If we have all the server
- # allocations, and at least one client core allocated, that'll
- # be good enough.
- if len(servers_allocation) == num_servers and \
- len(client_allocation) >= 1:
- break
- # We're going to have to overload cores, if allowed
- if not overload_cores:
- return (None, None)
- hyperthread_side = (hyperthread_side + 1) % len(core_layout)
- # Copy that entry of the core_layout
- current_cores = dict(core_layout[hyperthread_side])
- return (servers_allocation, client_allocation)
- if __name__ == "__main__":
- if len(sys.argv) > 1:
- num_servers = int(sys.argv[1])
- else:
- num_servers = 4
- if len(sys.argv) > 2:
- cores_per_server = int(sys.argv[2])
- else:
- cores_per_server = 1
- print(core_allocation(num_servers,cores_per_server))
|