/* * vim: ai ts=4 sts=4 sw=4 cinoptions=>4 expandtab */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #ifdef MULTITHREADED #include #include #endif #ifdef NUMA #include #include #endif /* how many runs to average by default */ #define DEFAULT_NR_LOOPS 40 /* we have 3 tests at the moment */ #define MAX_TESTS 3 /* default block size for test 2, in bytes */ #define DEFAULT_BLOCK_SIZE 262144 /* test types */ #define TEST_MEMCPY 0 #define TEST_DUMB 1 #define TEST_MCBLOCK 2 /* version number */ #define VERSION "1.5+smaug" /* * MBW memory bandwidth benchmark * * 2006, 2012 Andras.Horvath@gmail.com * 2013 j.m.slocum@gmail.com * (Special thanks to Stephen Pasich) * * http://github.com/raas/mbw * * compile with: * gcc -O -o mbw mbw.c * * run with eg.: * * ./mbw 300 * * or './mbw -h' for help * * watch out for swap usage (or turn off swap) */ #ifdef MULTITHREADED unsigned long num_threads = 1; volatile unsigned int done = 0; pthread_t *threads; sem_t start_sem, stop_sem, sync_sem; #endif long *arr_a, *arr_b; /* the two arrays to be copied from/to */ unsigned long long arr_size=0; /* array size (elements in array) */ unsigned int test_type; /* fixed memcpy block size for -t2 */ unsigned long long block_size=DEFAULT_BLOCK_SIZE; #ifdef NUMA void* mp_pages[1]; int mp_status[1]; int mp_nodes[1]; int numa_node_a = -1; int numa_node_b = -1; int numa_node_cpu = -1; struct bitmask* bitmask_a = NULL; struct bitmask* bitmask_b = NULL; #endif void usage() { printf("mbw memory benchmark v%s, https://github.com/raas/mbw\n", VERSION); printf("Usage: mbw [options] array_size_in_MiB\n"); printf("Options:\n"); printf(" -n: number of runs per test (0 to run forever)\n"); printf(" -a: Don't display average\n"); printf(" -t%d: memcpy test\n", TEST_MEMCPY); printf(" -t%d: dumb (b[i]=a[i] style) test\n", TEST_DUMB); printf(" -t%d: memcpy test with fixed block size\n", TEST_MCBLOCK); printf(" -b : block size in bytes for -t2 (default: %d)\n", DEFAULT_BLOCK_SIZE); printf(" -q: quiet (print statistics only)\n"); #ifdef NUMA printf(" -a : allocate source array on NUMA node\n"); printf(" -b : allocate target array on NUMA node\n"); printf(" -c : schedule task/threads on NUME node\n"); #endif printf("(will then use two arrays, watch out for swapping)\n"); printf("'Bandwidth' is amount of data copied over the time this operation took.\n"); printf("\nThe default is to run all tests available.\n"); } /* ------------------------------------------------------ */ /* allocate a test array and fill it with data * so as to force Linux to _really_ allocate it */ long *make_array() { unsigned long long t; unsigned int long_size=sizeof(long); long *a; a=calloc(arr_size, long_size); if(NULL==a) { perror("Error allocating memory"); exit(1); } /* make sure both arrays are allocated, fill with pattern */ for(t=0; t= block_size; t-=block_size, src+=block_size){ dst=(char *) memcpy(dst, src, block_size) + block_size; } if(t) { dst=(char *) memcpy(dst, src, t) + t; } } else if(test_type==TEST_DUMB) { /* dumb test */ for(t=dumb_start; t= block_size; t-=block_size, src+=block_size){ dst=(char *) memcpy(dst, src, block_size) + block_size; } if(t) { dst=(char *) memcpy(dst, src, t) + t; } clock_gettime(CLOCK_MONOTONIC, &endtime); } else if(test_type==TEST_DUMB) { /* dumb test */ clock_gettime(CLOCK_MONOTONIC, &starttime); for(t=0; tMAX_TESTS-1) { printf("Error: test number must be between 0 and %d\n", MAX_TESTS-1); exit(1); } tests[testno]=1; break; case 'B': /* block size in bytes*/ block_size=strtoull(optarg, (char **)NULL, 10); if(0>=block_size) { printf("Error: what block size do you mean?\n"); exit(1); } break; case 'q': /* quiet */ quiet=1; break; default: break; } } /* default is to run all tests if no specific tests were requested */ if( (tests[0]+tests[1]+tests[2]) == 0) { tests[0]=1; tests[1]=1; tests[2]=1; } if( nr_loops==0 && ((tests[0]+tests[1]+tests[2]) != 1) ) { printf("Error: nr_loops can be zero if only one test selected!\n"); exit(1); } if(optind=mt) { printf("Error: array size wrong!\n"); exit(1); } /* ------------------------------------------------------ */ long_size=sizeof(long); /* the size of long on this platform */ arr_size=1024*1024/long_size*mt; /* how many longs then in one array? */ if(arr_size*long_size < block_size) { printf("Error: array size larger than block size (%llu bytes)!\n", block_size); exit(1); } if(!quiet) { printf("Long uses %d bytes. ", long_size); printf("Allocating 2*%lld elements = %lld bytes of memory.\n", arr_size, 2*arr_size*long_size); if(tests[2]) { printf("Using %lld bytes as blocks for memcpy block copy test.\n", block_size); } } #ifdef NUMA struct bitmask *bitmask_all = numa_allocate_nodemask(); numa_bitmask_setall(bitmask_all); if (bitmask_a) { numa_set_membind(bitmask_a); numa_free_nodemask(bitmask_a); } #endif arr_a=make_array(); #ifdef NUMA if (bitmask_b) { numa_set_membind(bitmask_b); numa_free_nodemask(bitmask_b); } #endif arr_b=make_array(); #ifdef NUMA numa_set_membind(bitmask_all); numa_free_nodemask(bitmask_all); #endif #ifdef NUMA mp_pages[0] = arr_a; if (move_pages(0, 1, mp_pages, NULL, mp_status, 0) == -1) { perror("move_pages(arr_a)"); } else if (mp_status[0] < 0) { printf("move_pages error: %d", mp_status[0]); } else { numa_node_a = mp_status[0]; } mp_pages[0] = arr_b; if (move_pages(0, 1, mp_pages, NULL, mp_status, 0) == -1) { perror("move_pages(arr_b)"); } else if (mp_status[0] < 0) { printf("move_pages error: %d", mp_status[0]); } else { numa_node_b = mp_status[0]; } if (numa_node_cpu != -1) { if (numa_run_on_node(numa_node_cpu) == -1) { perror("numa_run_on_node"); numa_node_cpu = -1; } } #endif /* ------------------------------------------------------ */ if(!quiet) { printf("Getting down to business... Doing %d runs per test.\n", nr_loops); } #ifdef MULTITHREADED if (sem_init(&start_sem, 0, 0) != 0) { err(1, "sem_init"); } if (sem_init(&stop_sem, 0, 0) != 0) { err(1, "sem_init"); } if (sem_init(&sync_sem, 0, 0) != 0) { err(1, "sem_init"); } threads = calloc(num_threads, sizeof(pthread_t)); for (i=0; i < num_threads; i++) { if (pthread_create(&threads[i], NULL, thread_worker, (void*)(unsigned long)i) != 0) { err(1, "pthread_create"); } } #endif /* run all tests requested, the proper number of times */ for(test_type=0; test_type