From a9890714c3d4f703e5625e80a4131ee2599f05b5 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Tue, 13 Apr 2010 17:46:59 +0200 Subject: added tool to demo/evaluate various sync methods This tool is primarily meant as an aid during concept development, NOT as a usable end-user tool to be built (thus it is not inside the build system) --- tools/syncdemo.c | 319 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 319 insertions(+) create mode 100644 tools/syncdemo.c (limited to 'tools/syncdemo.c') diff --git a/tools/syncdemo.c b/tools/syncdemo.c new file mode 100644 index 00000000..1080ee5b --- /dev/null +++ b/tools/syncdemo.c @@ -0,0 +1,319 @@ +/* syncdemo - a program to demonstrate the performance and validity of different + * synchronization methods as well as some timing properties. + * + * The task to be done is very simple: a single gloabl integer is to to incremented + * by multiple threads. All this is done in a very-high concurrency environment. Note that + * the test is unfair to mechanisms likes spinlocks, because we have almost only wait + * time but no real processing time between the waits. However, the test provides + * some good insight into atomic instructions vs. other synchronisation methods. + * It also proves that garbling variables by not doing proper synchronisation is + * highly likely. For best results, this program should be executed on a + * multiprocessor machine (on a uniprocessor, it will probably not display the + * problems caused by missing synchronisation). + * + * compile with $ gcc -O0 -o syncdemo -lpthread syncdemo.c + * + * This program REQUIRES linux. With slight modification, it may run on Solaris. + * Note that gcc on Sparc does NOT offer atomic instruction support! + * + * Copyright (C) 2010 by Rainer Gerhards + * Released under the GNU GPLv3. + * + * Inspired by (retrieved 2010-04-13) + * http://www.alexonlinux.com/multithreaded-simple-data-type-access-and-atomic-variables + */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* config settings */ +static int bCPUAffinity = 0; +static int procs = 0; /* number of processors */ +static int numthrds = 0; /* if zero, => equal num of processors */ +static unsigned goal = 50000000; /* 50 million */ +static int bCVS = 0; /* generate CVS output? */ +static int numIterations = 1; /* number of iterations */ +static int dummyLoad = 0; /* number of dummy load iterations to generate */ +static enum { none, atomic, cas, mutex, spinlock } syncType; + +static int global_int = 0; /* our global counter */ +static unsigned thrd_WorkToDo; /* number of computations each thread must do */ +static volatile int bStartRun = 0; /* indicate to flag when threads should start */ + +static struct timeval tvStart, tvEnd; /* used for timing one testing iteration */ + +/* statistic counters */ +static long long totalRuntime; + +/* sync objects (if needed) */ +static pthread_mutex_t mut; +static pthread_spinlock_t spin; + +static char* +getSyncMethName() +{ + switch(syncType) { + case none : return "none"; + case atomic : return "atomic instruction"; + case mutex : return "mutex"; + case spinlock: return "spin lock"; + case cas : return "cas"; + } +} + + +static pid_t +gettid() +{ + return syscall( __NR_gettid ); +} + + +void *workerThread( void *arg ) +{ + int i, j; + int oldval, newval; /* for CAS sync mode */ + int thrd_num = (int)(long)arg; + cpu_set_t set; + + CPU_ZERO(&set); + CPU_SET(thrd_num % procs, &set); + + /* if enabled, try to put thread on a fixed CPU (the one that corresponds to the + * thread ID). This may + */ + if(bCPUAffinity) { + if (sched_setaffinity( gettid(), sizeof( cpu_set_t ), &set )) { + perror( "sched_setaffinity" ); + return NULL; + } + } + + /* wait for "go" */ + while(bStartRun == 0) + /*WAIT!*/; + + for (i = 0; i < thrd_WorkToDo; i++) { + switch(syncType) { + case none: + global_int++; + break; + case atomic: + __sync_fetch_and_add(&global_int,1); + break; + case cas: + do { + oldval = global_int; + newval = oldval + 1; + } while(!__sync_bool_compare_and_swap(&global_int, oldval, newval)); + break; + case mutex: + pthread_mutex_lock(&mut); + global_int++; + pthread_mutex_unlock(&mut); + break; + case spinlock: + pthread_spin_lock(&spin); + global_int++; + pthread_spin_unlock(&spin); + break; + } + + /* we now generate "dummy load" if instructed to do so. The idea is that + * we do some other work, as in real life, so that we have a better + * ratio of sync vs. actual work to do. + */ + for(j = 0 ; j < dummyLoad ; ++j) { + /* be careful: compiler may optimize loop out! */; + } + } + + return NULL; +} + + +static void beginTiming(void) +{ + if(!bCVS) { + printf("Test Parameters:\n"); + printf("\tNumber of Cores.........: %d\n", procs); + printf("\tNumber of Threads.......: %d\n", numthrds); + printf("\tSet Affinity............: %s\n", bCPUAffinity ? "yes" : "no"); + printf("\tCount to................: %u\n", goal); + printf("\tWork for each Thread....: %u\n", thrd_WorkToDo); + printf("\tDummy Load Counter......: %d\n", dummyLoad); + printf("\tSync Method used........: %s\n", getSyncMethName()); + } + gettimeofday(&tvStart, NULL); +} + + +static void endTiming(void) +{ + long sec, usec; + + gettimeofday(&tvEnd, NULL); + if(tvStart.tv_usec > tvEnd.tv_usec) { + tvEnd.tv_sec--; + tvEnd.tv_usec += 1000000; + } + + sec = tvEnd.tv_sec - tvStart.tv_sec; + usec = tvEnd.tv_usec - tvStart.tv_usec; + + if(bCVS) { + printf("%s,%d,%d,%d,%u,%ld.%ld\n", + getSyncMethName(), procs, numthrds, bCPUAffinity, goal, sec, usec); + } else { + printf("measured (sytem time) runtime is %ld.%ld seconds\n", sec, usec); + } + totalRuntime += sec * 1000 + (usec / 1000); +} + + +static void +usage(void) +{ + fprintf(stderr, "Usage: syncdemo -a -c -t\n"); + fprintf(stderr, "\t-a set CPU affinity\n"); + fprintf(stderr, "\t-c count to \n"); + fprintf(stderr, "\t-d dummy load, iterations\n"); + fprintf(stderr, "\t-t number of threads to use\n"); + fprintf(stderr, "\t-s sync-type to use (none, atomic, mutex, spin)\n"); + fprintf(stderr, "\t-C generate CVS output\n"); + fprintf(stderr, "\t-I number of iterations\n"); + exit(2); +} + + +/* carry out the actual test (one iteration) + */ +static void +singleTest(void) +{ + int i; + unsigned delta; + pthread_t *thrs; + + global_int = 0; + bStartRun = 0; + + thrs = malloc(sizeof(pthread_t) * numthrds); + if (thrs == NULL) { + perror( "malloc" ); + exit(1); + } + + thrd_WorkToDo = goal / numthrds; + + for (i = 0; i < numthrds; i++) { + if(pthread_create( &thrs[i], NULL, workerThread, (void *)(long)i )) { + perror( "pthread_create" ); + procs = i; + break; + } + } + + beginTiming(); + bStartRun = 1; /* start the threads (they are busy-waiting so far!) */ + + for (i = 0; i < numthrds; i++) + pthread_join( thrs[i], NULL ); + + endTiming(); + + free( thrs ); + + delta = thrd_WorkToDo * numthrds - global_int; + if(!bCVS) { + if(delta == 0) { + printf("Computation was done correctly.\n"); + } else { + printf("Computation INCORRECT,\n" + "\texpected %9u\n" + "\treal %9u\n" + "\toff by %9u\n", + thrd_WorkToDo * numthrds, + global_int, + delta); + } + } +} + + +int +main(int argc, char *argv[]) +{ + int i; + int opt; + + while((opt = getopt(argc, argv, "ac:d:i:t:s:C")) != EOF) { + switch((char)opt) { + case 'a': + bCPUAffinity = 1; + break; + case 'c': + goal = (unsigned) atol(optarg); + break; + case 'd': + dummyLoad = atoi(optarg); + break; + case 'i': + numIterations = atoi(optarg); + break; + case 't': + numthrds = atoi(optarg); + break; + case 'C': + bCVS = 1; + break; + case 's': + if(!strcmp(optarg, "none")) + syncType = none; + else if(!strcmp(optarg, "atomic")) + syncType = atomic; + else if(!strcmp(optarg, "cas")) + syncType = cas; + else if(!strcmp(optarg, "mutex")) { + syncType = mutex; + pthread_mutex_init(&mut, NULL); + } else if(!strcmp(optarg, "spin")) { + syncType = spinlock; + pthread_spin_init(&spin, PTHREAD_PROCESS_PRIVATE); + } else { + fprintf(stderr, "error: invalid sync mode '%s'\n", optarg); + usage(); + } + break; + default:usage(); + break; + } + } + + /* Getting number of CPUs */ + procs = (int)sysconf(_SC_NPROCESSORS_ONLN); + if (procs < 0) { + perror( "sysconf" ); + return -1; + } + + if(numthrds < 1) { + numthrds = procs; + } + + totalRuntime = 0; + for(i = 0 ; i < numIterations ; ++i) { + singleTest(); + } + + printf("total runtime %ld, avg %ld\n", totalRuntime, totalRuntime / numIterations); + return 0; +} -- cgit v1.2.3 From f7f81e89d514b4ff7763285a8932d580c032995c Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Wed, 14 Apr 2010 07:20:28 +0200 Subject: (minor) some improvements to syncdemo --- tools/syncdemo.c | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) (limited to 'tools/syncdemo.c') diff --git a/tools/syncdemo.c b/tools/syncdemo.c index 1080ee5b..b4b75cdc 100644 --- a/tools/syncdemo.c +++ b/tools/syncdemo.c @@ -39,7 +39,7 @@ static int bCPUAffinity = 0; static int procs = 0; /* number of processors */ static int numthrds = 0; /* if zero, => equal num of processors */ static unsigned goal = 50000000; /* 50 million */ -static int bCVS = 0; /* generate CVS output? */ +static int bCSV = 0; /* generate CVS output? */ static int numIterations = 1; /* number of iterations */ static int dummyLoad = 0; /* number of dummy load iterations to generate */ static enum { none, atomic, cas, mutex, spinlock } syncType; @@ -142,7 +142,7 @@ void *workerThread( void *arg ) static void beginTiming(void) { - if(!bCVS) { + if(!bCSV) { printf("Test Parameters:\n"); printf("\tNumber of Cores.........: %d\n", procs); printf("\tNumber of Threads.......: %d\n", numthrds); @@ -158,6 +158,7 @@ static void beginTiming(void) static void endTiming(void) { + unsigned delta; long sec, usec; gettimeofday(&tvEnd, NULL); @@ -169,12 +170,25 @@ static void endTiming(void) sec = tvEnd.tv_sec - tvStart.tv_sec; usec = tvEnd.tv_usec - tvStart.tv_usec; - if(bCVS) { - printf("%s,%d,%d,%d,%u,%ld.%ld\n", - getSyncMethName(), procs, numthrds, bCPUAffinity, goal, sec, usec); + delta = thrd_WorkToDo * numthrds - global_int; + if(bCSV) { + printf("%s,%d,%d,%d,%u,%u,%ld.%ld\n", + getSyncMethName(), procs, numthrds, bCPUAffinity, goal, delta, sec, usec); } else { printf("measured (sytem time) runtime is %ld.%ld seconds\n", sec, usec); + if(delta == 0) { + printf("Computation was done correctly.\n"); + } else { + printf("Computation INCORRECT,\n" + "\texpected %9u\n" + "\treal %9u\n" + "\toff by %9u\n", + thrd_WorkToDo * numthrds, + global_int, + delta); + } } + totalRuntime += sec * 1000 + (usec / 1000); } @@ -200,7 +214,6 @@ static void singleTest(void) { int i; - unsigned delta; pthread_t *thrs; global_int = 0; @@ -232,20 +245,6 @@ singleTest(void) free( thrs ); - delta = thrd_WorkToDo * numthrds - global_int; - if(!bCVS) { - if(delta == 0) { - printf("Computation was done correctly.\n"); - } else { - printf("Computation INCORRECT,\n" - "\texpected %9u\n" - "\treal %9u\n" - "\toff by %9u\n", - thrd_WorkToDo * numthrds, - global_int, - delta); - } - } } @@ -273,7 +272,7 @@ main(int argc, char *argv[]) numthrds = atoi(optarg); break; case 'C': - bCVS = 1; + bCSV = 1; break; case 's': if(!strcmp(optarg, "none")) -- cgit v1.2.3 From fd05cbf25617fdd5216c23158d7919ee0607aa2d Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Sun, 2 May 2010 12:34:56 +0200 Subject: enhanced tool to test timing of sync methods --- tools/syncdemo.c | 122 ++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 93 insertions(+), 29 deletions(-) (limited to 'tools/syncdemo.c') diff --git a/tools/syncdemo.c b/tools/syncdemo.c index b4b75cdc..41a65bcf 100644 --- a/tools/syncdemo.c +++ b/tools/syncdemo.c @@ -34,15 +34,20 @@ #include #include + +typedef enum { none, atomic, cas, mutex, spinlock } syncType_t; +static syncType_t syncTypes[] = { none, atomic, cas, mutex, spinlock }; + /* config settings */ static int bCPUAffinity = 0; static int procs = 0; /* number of processors */ static int numthrds = 0; /* if zero, => equal num of processors */ static unsigned goal = 50000000; /* 50 million */ -static int bCSV = 0; /* generate CVS output? */ +static int bCSV = 0; /* generate CSV output? */ static int numIterations = 1; /* number of iterations */ static int dummyLoad = 0; /* number of dummy load iterations to generate */ -static enum { none, atomic, cas, mutex, spinlock } syncType; +syncType_t syncType; +static int bAllSyncTypes = 0; static int global_int = 0; /* our global counter */ static unsigned thrd_WorkToDo; /* number of computations each thread must do */ @@ -52,17 +57,19 @@ static struct timeval tvStart, tvEnd; /* used for timing one testing iteration * /* statistic counters */ static long long totalRuntime; +static unsigned minRuntime = 999999999; +static unsigned maxRuntime = 0; /* sync objects (if needed) */ static pthread_mutex_t mut; static pthread_spinlock_t spin; static char* -getSyncMethName() +getSyncMethName(syncType_t st) { - switch(syncType) { + switch(st) { case none : return "none"; - case atomic : return "atomic instruction"; + case atomic : return "atomic op"; case mutex : return "mutex"; case spinlock: return "spin lock"; case cas : return "cas"; @@ -142,7 +149,7 @@ void *workerThread( void *arg ) static void beginTiming(void) { - if(!bCSV) { + if(!(bCSV || bAllSyncTypes)) { printf("Test Parameters:\n"); printf("\tNumber of Cores.........: %d\n", procs); printf("\tNumber of Threads.......: %d\n", numthrds); @@ -150,7 +157,7 @@ static void beginTiming(void) printf("\tCount to................: %u\n", goal); printf("\tWork for each Thread....: %u\n", thrd_WorkToDo); printf("\tDummy Load Counter......: %d\n", dummyLoad); - printf("\tSync Method used........: %s\n", getSyncMethName()); + printf("\tSync Method used........: %s\n", getSyncMethName(syncType)); } gettimeofday(&tvStart, NULL); } @@ -160,6 +167,7 @@ static void endTiming(void) { unsigned delta; long sec, usec; + long runtime; gettimeofday(&tvEnd, NULL); if(tvStart.tv_usec > tvEnd.tv_usec) { @@ -171,25 +179,32 @@ static void endTiming(void) usec = tvEnd.tv_usec - tvStart.tv_usec; delta = thrd_WorkToDo * numthrds - global_int; - if(bCSV) { - printf("%s,%d,%d,%d,%u,%u,%ld.%ld\n", - getSyncMethName(), procs, numthrds, bCPUAffinity, goal, delta, sec, usec); - } else { - printf("measured (sytem time) runtime is %ld.%ld seconds\n", sec, usec); - if(delta == 0) { - printf("Computation was done correctly.\n"); + if(!bAllSyncTypes) { + if(bCSV) { + printf("%s,%d,%d,%d,%u,%u,%ld.%06.6ld\n", + getSyncMethName(syncType), procs, numthrds, bCPUAffinity, goal, delta, sec, usec); } else { - printf("Computation INCORRECT,\n" - "\texpected %9u\n" - "\treal %9u\n" - "\toff by %9u\n", - thrd_WorkToDo * numthrds, - global_int, - delta); + printf("measured (sytem time) runtime is %ld.%06.6ld seconds\n", sec, usec); + if(delta == 0) { + printf("Computation was done correctly.\n"); + } else { + printf("Computation INCORRECT,\n" + "\texpected %9u\n" + "\treal %9u\n" + "\toff by %9u\n", + thrd_WorkToDo * numthrds, + global_int, + delta); + } } } - totalRuntime += sec * 1000 + (usec / 1000); + runtime = sec * 1000 + (usec / 1000); + totalRuntime += runtime; + if(runtime < minRuntime) + minRuntime = runtime; + if(runtime > maxRuntime) + maxRuntime = runtime; } @@ -198,12 +213,13 @@ usage(void) { fprintf(stderr, "Usage: syncdemo -a -c -t\n"); fprintf(stderr, "\t-a set CPU affinity\n"); + fprintf(stderr, "\t-i number of iterations\n"); fprintf(stderr, "\t-c count to \n"); fprintf(stderr, "\t-d dummy load, iterations\n"); fprintf(stderr, "\t-t number of threads to use\n"); fprintf(stderr, "\t-s sync-type to use (none, atomic, mutex, spin)\n"); - fprintf(stderr, "\t-C generate CVS output\n"); - fprintf(stderr, "\t-I number of iterations\n"); + fprintf(stderr, "\t-C generate CSV output\n"); + fprintf(stderr, "\t-A test ALL sync types\n"); exit(2); } @@ -248,14 +264,57 @@ singleTest(void) } +/* display an unsigned ms runtime count as string. Note that the + * string is inside a dynamically allocated buffer, which the caller + * must free to prevent a memory leak. + */ +char * +dispRuntime(unsigned rt) +{ + static char *fmtbuf; + + fmtbuf = malloc(32 * sizeof(char)); + snprintf(fmtbuf, 32, "%u.%03.3u", + rt / 1000, rt % 1000); + return(fmtbuf); +} + + +doTest(syncType_t st) +{ + int i; + + syncType = st; + totalRuntime = 0; + minRuntime = 999999999; + maxRuntime = 0; + for(i = 0 ; i < numIterations ; ++i) { + //printf("starting iteration %d\n", i); + singleTest(); + } + + /* we have a memory leak due to calling dispRuntime(), but we don't + * care as we terminate immediately. + */ + printf("%9s: total runtime %8.8ld, avg %s, min %s, max %s\n", + getSyncMethName(st), (long)totalRuntime, + dispRuntime((unsigned) (totalRuntime / numIterations)), + dispRuntime(minRuntime), + dispRuntime(maxRuntime)); +} + + int main(int argc, char *argv[]) { int i; int opt; - while((opt = getopt(argc, argv, "ac:d:i:t:s:C")) != EOF) { + while((opt = getopt(argc, argv, "ac:d:i:t:s:CA")) != EOF) { switch((char)opt) { + case 'A': + bAllSyncTypes = 1; + break; case 'a': bCPUAffinity = 1; break; @@ -308,11 +367,16 @@ main(int argc, char *argv[]) numthrds = procs; } - totalRuntime = 0; - for(i = 0 ; i < numIterations ; ++i) { - singleTest(); + if(bAllSyncTypes) { + pthread_mutex_init(&mut, NULL); + pthread_spin_init(&spin, PTHREAD_PROCESS_PRIVATE); + for(i = 0 ; i < sizeof(syncTypes) / sizeof(syncType_t) ; ++i) { + doTest(syncTypes[i]); + } + printf("done running tests\n"); + } else { + doTest(syncType); } - printf("total runtime %ld, avg %ld\n", totalRuntime, totalRuntime / numIterations); return 0; } -- cgit v1.2.3 From 3b0a3cb9840f46c41ccf339e0ace99646cf2d32a Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Sun, 2 May 2010 17:21:51 +0200 Subject: added semaphores to sync test scenario --- tools/syncdemo.c | 44 ++++++++++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 12 deletions(-) (limited to 'tools/syncdemo.c') diff --git a/tools/syncdemo.c b/tools/syncdemo.c index 41a65bcf..a9c394f6 100644 --- a/tools/syncdemo.c +++ b/tools/syncdemo.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -35,8 +36,8 @@ #include -typedef enum { none, atomic, cas, mutex, spinlock } syncType_t; -static syncType_t syncTypes[] = { none, atomic, cas, mutex, spinlock }; +typedef enum { none, atomic, cas, spinlock, mutex, semaphore } syncType_t; +static syncType_t syncTypes[] = { none, atomic, cas, spinlock, mutex, semaphore }; /* config settings */ static int bCPUAffinity = 0; @@ -63,16 +64,18 @@ static unsigned maxRuntime = 0; /* sync objects (if needed) */ static pthread_mutex_t mut; static pthread_spinlock_t spin; +static sem_t sem; static char* getSyncMethName(syncType_t st) { switch(st) { - case none : return "none"; - case atomic : return "atomic op"; - case mutex : return "mutex"; - case spinlock: return "spin lock"; - case cas : return "cas"; + case none : return "none"; + case atomic : return "atomic op"; + case spinlock : return "spin lock"; + case mutex : return "mutex"; + case semaphore: return "semaphore"; + case cas : return "cas"; } } @@ -132,6 +135,11 @@ void *workerThread( void *arg ) global_int++; pthread_spin_unlock(&spin); break; + case semaphore: + sem_wait(&sem); + global_int++; + sem_post(&sem); + break; } /* we now generate "dummy load" if instructed to do so. The idea is that @@ -184,7 +192,7 @@ static void endTiming(void) printf("%s,%d,%d,%d,%u,%u,%ld.%06.6ld\n", getSyncMethName(syncType), procs, numthrds, bCPUAffinity, goal, delta, sec, usec); } else { - printf("measured (sytem time) runtime is %ld.%06.6ld seconds\n", sec, usec); + printf("measured (sytem time) runtime is %ld.% 6.6ld seconds\n", sec, usec); if(delta == 0) { printf("Computation was done correctly.\n"); } else { @@ -217,7 +225,7 @@ usage(void) fprintf(stderr, "\t-c count to \n"); fprintf(stderr, "\t-d dummy load, iterations\n"); fprintf(stderr, "\t-t number of threads to use\n"); - fprintf(stderr, "\t-s sync-type to use (none, atomic, mutex, spin)\n"); + fprintf(stderr, "\t-s sync-type to use (none, atomic, mutex, spin, semaphore)\n"); fprintf(stderr, "\t-C generate CSV output\n"); fprintf(stderr, "\t-A test ALL sync types\n"); exit(2); @@ -296,8 +304,9 @@ doTest(syncType_t st) /* we have a memory leak due to calling dispRuntime(), but we don't * care as we terminate immediately. */ - printf("%9s: total runtime %8.8ld, avg %s, min %s, max %s\n", - getSyncMethName(st), (long)totalRuntime, + printf("%-10s: total runtime %6ld.%3.3u, avg %s, min %s, max %s\n", + getSyncMethName(st), + (long)totalRuntime/1000, (unsigned)(totalRuntime % 1000), dispRuntime((unsigned) (totalRuntime / numIterations)), dispRuntime(minRuntime), dispRuntime(maxRuntime)); @@ -346,6 +355,9 @@ main(int argc, char *argv[]) } else if(!strcmp(optarg, "spin")) { syncType = spinlock; pthread_spin_init(&spin, PTHREAD_PROCESS_PRIVATE); + } else if(!strcmp(optarg, "semaphore")) { + syncType = semaphore; + sem_init(&sem, 0, 1); } else { fprintf(stderr, "error: invalid sync mode '%s'\n", optarg); usage(); @@ -370,10 +382,18 @@ main(int argc, char *argv[]) if(bAllSyncTypes) { pthread_mutex_init(&mut, NULL); pthread_spin_init(&spin, PTHREAD_PROCESS_PRIVATE); + sem_init(&sem, 0, 1); for(i = 0 ; i < sizeof(syncTypes) / sizeof(syncType_t) ; ++i) { doTest(syncTypes[i]); } - printf("done running tests\n"); + printf("Done running tests, result based on:\n"); + printf("\tNumber of Cores.........: %d\n", procs); + printf("\tNumber of Threads.......: %d\n", numthrds); + printf("\tSet CPU Affinity........: %s\n", bCPUAffinity ? "yes" : "no"); + printf("\tCount to................: %u\n", goal); + printf("\tWork for each Thread....: %u\n", thrd_WorkToDo); + printf("\tDummy Load Counter......: %d\n", dummyLoad); + printf("\tIterations..............: %d\n", numIterations); } else { doTest(syncType); } -- cgit v1.2.3 From 5ab54ee0fb7ff106d057381b73e3af8c58ebaed0 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Wed, 5 May 2010 17:47:57 +0200 Subject: improved syncdemo tool - added "partition" mode... ... and also analyzed the resulting assembly code to find out weak spots for the test. Explanations and new suggestions added. --- tools/syncdemo.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 47 insertions(+), 9 deletions(-) (limited to 'tools/syncdemo.c') diff --git a/tools/syncdemo.c b/tools/syncdemo.c index a9c394f6..89a5c6cc 100644 --- a/tools/syncdemo.c +++ b/tools/syncdemo.c @@ -11,7 +11,25 @@ * multiprocessor machine (on a uniprocessor, it will probably not display the * problems caused by missing synchronisation). * - * compile with $ gcc -O0 -o syncdemo -lpthread syncdemo.c + * Note: partitioned processing mode means that all computation is first done + * locally and the final result is then combined doing proper synchronization. + * This mode is used as a baseline for uninterrupted processing. + * + * compile with $ gcc -O1 -o syncdemo -lpthread syncdemo.c + * + * Alternatively, you may use -O0, but not a higher level. Note that + * the gcc code generator does in neither case generate code really + * suitable to compare "part" and "none" modes. If you absolutely need + * to do that, you need to use inline assembly. However, the results should + * be fairly OK when consitently using either -O0 or -O1. If you see a big loss + * of performance when you compare "none" and "part", be sure to run + * "none" with -t1 and watch out for the results! In any case, looking at the generated + * assembly code is vital to interpret results correctly. Review of generated assembly + * done on 2010-05-05 indicates that -O0 is probably the best choice. Note that we + * use the volatile attribute in one spot. This is used because it results in the + * best comparable result for our gcc 4.4.3, not really to invoke the volatile semantics. + * + * use "gcc -g -Wa,-ahl=syncdemo.s -lpthread syncdemo.c" to obtain a mixed code/assembly listing. * * This program REQUIRES linux. With slight modification, it may run on Solaris. * Note that gcc on Sparc does NOT offer atomic instruction support! @@ -36,8 +54,8 @@ #include -typedef enum { none, atomic, cas, spinlock, mutex, semaphore } syncType_t; -static syncType_t syncTypes[] = { none, atomic, cas, spinlock, mutex, semaphore }; +typedef enum { part, none, atomic, cas, spinlock, mutex, semaphore } syncType_t; +static syncType_t syncTypes[] = { part, none, atomic, cas, spinlock, mutex, semaphore }; /* config settings */ static int bCPUAffinity = 0; @@ -70,6 +88,7 @@ static char* getSyncMethName(syncType_t st) { switch(st) { + case part : return "partition"; case none : return "none"; case atomic : return "atomic op"; case spinlock : return "spin lock"; @@ -90,12 +109,17 @@ gettid() void *workerThread( void *arg ) { int i, j; + volatile int partval = 0; /* use volatile so that gcc generates code similar to global var */ + int *partptr; int oldval, newval; /* for CAS sync mode */ int thrd_num = (int)(long)arg; cpu_set_t set; CPU_ZERO(&set); CPU_SET(thrd_num % procs, &set); + if(syncType == part) { + partval = 0; + } /* if enabled, try to put thread on a fixed CPU (the one that corresponds to the * thread ID). This may @@ -113,6 +137,11 @@ void *workerThread( void *arg ) for (i = 0; i < thrd_WorkToDo; i++) { switch(syncType) { + case part: + ///* one needs to use inline assembly to get this right... */ + //asm("addl $1, global_int(%rip)"); + partval++; + break; case none: global_int++; break; @@ -151,6 +180,12 @@ void *workerThread( void *arg ) } } + if(syncType == part) { + pthread_mutex_lock(&mut); + global_int += partval; + pthread_mutex_unlock(&mut); + } + return NULL; } @@ -345,6 +380,8 @@ main(int argc, char *argv[]) case 's': if(!strcmp(optarg, "none")) syncType = none; + else if(!strcmp(optarg, "part")) + syncType = part; else if(!strcmp(optarg, "atomic")) syncType = atomic; else if(!strcmp(optarg, "cas")) @@ -354,7 +391,6 @@ main(int argc, char *argv[]) pthread_mutex_init(&mut, NULL); } else if(!strcmp(optarg, "spin")) { syncType = spinlock; - pthread_spin_init(&spin, PTHREAD_PROCESS_PRIVATE); } else if(!strcmp(optarg, "semaphore")) { syncType = semaphore; sem_init(&sem, 0, 1); @@ -368,10 +404,15 @@ main(int argc, char *argv[]) } } + /* for simplicity, we init all sync helpers no matter if we need them */ + pthread_mutex_init(&mut, NULL); + pthread_spin_init(&spin, PTHREAD_PROCESS_PRIVATE); + sem_init(&sem, 0, 1); + /* Getting number of CPUs */ procs = (int)sysconf(_SC_NPROCESSORS_ONLN); - if (procs < 0) { - perror( "sysconf" ); + if(procs < 0) { + perror("sysconf"); return -1; } @@ -380,9 +421,6 @@ main(int argc, char *argv[]) } if(bAllSyncTypes) { - pthread_mutex_init(&mut, NULL); - pthread_spin_init(&spin, PTHREAD_PROCESS_PRIVATE); - sem_init(&sem, 0, 1); for(i = 0 ; i < sizeof(syncTypes) / sizeof(syncType_t) ; ++i) { doTest(syncTypes[i]); } -- cgit v1.2.3