/* * lat_ctx.c - context switch timer * * usage: lat_ctx [-s size] #procs [#procs....] * * Copyright (c) 1994 Larry McVoy. Distributed under the FSF GPL with * additional restriction that results may published only if * (1) the benchmark is unmodified, and * (2) the version in the sccsid below is included in the report. * Support for this development by Sun Microsystems is gratefully acknowledged. */ char *id = "$Id$\n"; #include "bench.h" #if defined(sgi) && defined(PIN) #include #include int ncpus; #endif #define MAXPROC 2048 #define CHUNK (4<<10) #define TRIPS 5 #ifndef max #define max(a, b) ((a) > (b) ? (a) : (b)) #endif int process_size, *data; /* size & pointer to an array that big */ int pids[MAXPROC]; int p[MAXPROC][2]; double pipe_cost(int p[][2], int procs); int ctx(int procs, int nprocs); int sumit(int); void killem(int procs); void doit(int p[MAXPROC][2], int rd, int wr); int create_pipes(int p[][2], int procs); int create_daemons(int p[][2], int pids[], int procs); int main(int ac, char **av) { int i, max_procs; double overhead = 0; if (ac < 2) { usage: printf("Usage: %s [-s kbytes] processes [processes ...]\n", av[0]); exit(1); } /* * Need 4 byte ints. */ if (sizeof(int) != 4) { fprintf(stderr, "Fix sumit() in ctx.c.\n"); exit(1); } /* * If they specified a context size, get it. */ if (!strcmp(av[1], "-s")) { if (ac < 4) { goto usage; } process_size = atoi(av[2]) * 1024; if (process_size > 0) { data = (int *)calloc(1, max(process_size, CHUNK)); BENCHO(sumit(CHUNK), sumit(0), 0); overhead = gettime(); overhead /= get_n(); overhead *= process_size; overhead /= CHUNK; } ac -= 2; av += 2; } #if defined(sgi) && defined(PIN) ncpus = sysmp(MP_NPROCS); sysmp(MP_MUSTRUN, 0); #endif for (max_procs = atoi(av[1]), i = 1; i < ac; ++i) { int procs = atoi(av[i]); if (max_procs < procs) max_procs = procs; } max_procs = create_pipes(p, max_procs); overhead += pipe_cost(p, max_procs); max_procs = create_daemons(p, pids, max_procs); fprintf(stderr, "\n\"size=%dk ovr=%.2f\n", process_size/1024, overhead); for (i = 1; i < ac; ++i) { double time; int procs = atoi(av[i]); if (procs > max_procs) continue; BENCH(ctx(procs, max_procs), 0); time = usecs_spent(); time /= get_n(); time /= procs; time /= TRIPS; time -= overhead; fprintf(stderr, "%d %.2f\n", procs, time); } /* * Close the pipes and kill the children. */ killem(max_procs); for (i = 0; i < max_procs; ++i) { close(p[i][0]); close(p[i][1]); if (i > 0) { wait(0); } } return (0); } int ctx(int procs, int nprocs) { int msg; int i; int sum; /* * Main process - all others should be ready to roll, time the * loop. */ for (i = 0; i < TRIPS; ++i) { if (write(p[nprocs - procs][1], &msg, sizeof(msg)) != sizeof(msg)) { if (errno) perror("read/write on pipe"); exit(1); } if (read(p[nprocs-1][0], &msg, sizeof(msg)) != sizeof(msg)) { if (errno) perror("read/write on pipe"); exit(1); } sum = sumit(process_size); } return (sum); } void killem(int procs) { int i; for (i = 1; i < procs; ++i) { if (pids[i] > 0) { kill(pids[i], SIGTERM); } } } void doit(int p[][2], int rd, int wr) { int msg, sum = 0 /* lint */; signal(SIGTERM, SIG_DFL); if (data) bzero((void*)data, process_size); for ( ;; ) { if (read(p[rd][0], &msg, sizeof(msg)) != sizeof(msg)) { if (errno) perror("read/write on pipe"); break; } sum = sumit(process_size); if (write(p[wr][1], &msg, sizeof(msg)) != sizeof(msg)) { if (errno) perror("read/write on pipe"); break; } } use_int(sum); exit(1); } int doit_cost(int p[][2], int procs) { static int k; int msg = 1; int i; for (i = 0; i < TRIPS; ++i) { if (write(p[k][1], &msg, sizeof(msg)) != sizeof(msg)) { if (errno) perror("read/write on pipe"); exit(1); } if (read(p[k][0], &msg, sizeof(msg)) != sizeof(msg)) { if (errno) perror("read/write on pipe"); exit(1); } if (++k == procs) { k = 0; } } return (msg); } /* * The cost returned is the cost of going through one pipe once in usecs. * No memory costs are included here, this is different than lmbench1. */ double pipe_cost(int p[][2], int procs) { double result; /* * Measure the overhead of passing a byte around the ring. */ BENCH(doit_cost(p, procs), 0); result = usecs_spent(); result /= get_n(); result /= TRIPS; return result; } int create_daemons(int p[][2], int pids[], int procs) { int i, j; int msg; /* * Use the pipes as a ring, and fork off a bunch of processes * to pass the byte through their part of the ring. * * Do the sum in each process and get that time before moving on. */ signal(SIGTERM, SIG_IGN); bzero(pids, procs * sizeof(pid_t)); for (i = 1; i < procs; ++i) { switch (pids[i] = fork()) { case -1: /* could not fork, out of processes? */ procs = i; break; case 0: /* child */ #if defined(sgi) && defined(PIN) sysmp(MP_MUSTRUN, i % ncpus); #endif for (j = 0; j < procs; ++j) { if (j != i-1) close(p[j][0]); if (j != i) close(p[j][1]); } doit(p, i-1, i); /* NOTREACHED */ default: /* parent */ ; } } /* * Go once around the loop to make sure that everyone is ready and * to get the token in the pipeline. */ if (write(p[0][1], &msg, sizeof(msg)) != sizeof(msg) || read(p[procs-1][0], &msg, sizeof(msg)) != sizeof(msg)) { if (errno) perror("write/read/write on pipe"); exit(1); } if (data) bzero((void*)data, process_size); return procs; } int create_pipes(int p[][2], int procs) { int i; /* * Get a bunch of pipes. */ morefds(); for (i = 0; i < procs; ++i) { if (pipe(p[i]) == -1) { return i; } } return procs; } /* * Bring howmuch data into the cache, assuming that the smallest cache * line is 16 bytes. */ int sumit(int howmuch) { int done, sum = 0; register int *d = data; #if 0 #define A sum+=d[0]+d[4]+d[8]+d[12]+d[16]+d[20]+d[24]+d[28]+\ d[32]+d[36]+d[40]+d[44]+d[48]+d[52]+d[56]+d[60]+\ d[64]+d[68]+d[72]+d[76]+d[80]+d[84]+d[88]+d[92]+\ d[96]+d[100]+d[104]+d[108]+d[112]+d[116]+d[120]+d[124];\ d+=128; #define TWOKB A A A A #else #define A sum+=d[0]+d[1]+d[2]+d[3]+d[4]+d[5]+d[6]+d[7]+d[8]+d[9]+\ d[10]+d[11]+d[12]+d[13]+d[14]+d[15]+d[16]+d[17]+d[18]+d[19]+\ d[20]+d[21]+d[22]+d[23]+d[24]+d[25]+d[26]+d[27]+d[28]+d[29]+\ d[30]+d[31]+d[32]+d[33]+d[34]+d[35]+d[36]+d[37]+d[38]+d[39]+\ d[40]+d[41]+d[42]+d[43]+d[44]+d[45]+d[46]+d[47]+d[48]+d[49]+\ d[50]+d[51]+d[52]+d[53]+d[54]+d[55]+d[56]+d[57]+d[58]+d[59]+\ d[60]+d[61]+d[62]+d[63]+d[64]+d[65]+d[66]+d[67]+d[68]+d[69]+\ d[70]+d[71]+d[72]+d[73]+d[74]+d[75]+d[76]+d[77]+d[78]+d[79]+\ d[80]+d[81]+d[82]+d[83]+d[84]+d[85]+d[86]+d[87]+d[88]+d[89]+\ d[90]+d[91]+d[92]+d[93]+d[94]+d[95]+d[96]+d[97]+d[98]+d[99]+\ d[100]+d[101]+d[102]+d[103]+d[104]+\ d[105]+d[106]+d[107]+d[108]+d[109]+\ d[110]+d[111]+d[112]+d[113]+d[114]+\ d[115]+d[116]+d[117]+d[118]+d[119]+\ d[120]+d[121]+d[122]+d[123]+d[124]+d[125]+d[126]+d[127];\ d+=128; /* ints; bytes == 512 */ #define TWOKB A A A A #endif for (done = 0; done < howmuch; done += 2048) { TWOKB } return (sum); }