summaryrefslogtreecommitdiff
path: root/BS
diff options
context:
space:
mode:
authorBirte Kristina Friesel <birte.friesel@uos.de>2024-07-04 08:29:32 +0200
committerBirte Kristina Friesel <birte.friesel@uos.de>2024-07-04 08:29:32 +0200
commit6ea34c34074793b4989614207ffe414cd3824e6b (patch)
tree3d8b79169e03b66fdcfef646eda3424bab1ec3d3 /BS
parent0be67659d02b62cee3a34c19fa25a758eb2472d1 (diff)
BS: Add NUMA variant for HBM
Diffstat (limited to 'BS')
-rw-r--r--BS/baselines/cpu/Makefile9
-rw-r--r--BS/baselines/cpu/bs_omp.c82
-rwxr-xr-xBS/baselines/cpu/run.sh7
-rwxr-xr-xBS/dimes-hetsim.sh40
4 files changed, 123 insertions, 15 deletions
diff --git a/BS/baselines/cpu/Makefile b/BS/baselines/cpu/Makefile
index 8faf3c4..5c540d7 100644
--- a/BS/baselines/cpu/Makefile
+++ b/BS/baselines/cpu/Makefile
@@ -1,8 +1,15 @@
+NUMA ?= 0
+FLAGS =
+
+ifeq (${NUMA}, 1)
+ FLAGS += -lnuma
+endif
+
.PHONY: all
all: bs_omp
bs_omp: bs_omp.c
- gcc -O2 bs_omp.c -o bs_omp -fopenmp
+ gcc -Wall -Wextra -pedantic -O2 -DNUMA=${NUMA} bs_omp.c -o bs_omp -fopenmp ${FLAGS}
bs_omp_O0: bs_omp.c
gcc bs_omp.c -o bs_omp_O0 -fopenmp
diff --git a/BS/baselines/cpu/bs_omp.c b/BS/baselines/cpu/bs_omp.c
index 3775bce..f160c58 100644
--- a/BS/baselines/cpu/bs_omp.c
+++ b/BS/baselines/cpu/bs_omp.c
@@ -1,4 +1,3 @@
-
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
@@ -10,15 +9,24 @@
#include <stdint.h>
#include "timer.h"
+#if NUMA
+#include <numaif.h>
+#include <numa.h>
+
+void* mp_pages[1];
+int mp_status[1];
+int mp_nodes[1];
+struct bitmask* bitmask_in;
+int numa_node_in = -1;
+int numa_node_cpu = -1;
+#endif
+
#define DTYPE uint64_t
/*
* @brief creates a "test file" by filling a bufferwith values
*/
void create_test_file(DTYPE * input, uint64_t nr_elements, DTYPE * querys, uint64_t n_querys) {
- uint64_t max = UINT64_MAX;
- uint64_t min = 0;
-
srand(time(NULL));
input[0] = 1;
@@ -74,23 +82,63 @@ uint64_t binarySearch(DTYPE * input, uint64_t input_size, DTYPE* querys, unsigne
* @brief Main of the Host Application.
*/
int main(int argc, char **argv) {
-
+ (void)argc;
Timer timer;
uint64_t input_size = atol(argv[1]);
uint64_t n_querys = atol(argv[2]);
+#if NUMA
+ bitmask_in = numa_parse_nodestring(argv[3]);
+ numa_node_cpu = atoi(argv[4]);
+#endif
printf("Vector size: %lu, num searches: %lu\n", input_size, n_querys);
-
+
+#if NUMA
+ if (bitmask_in) {
+ numa_set_membind(bitmask_in);
+ numa_free_nodemask(bitmask_in);
+ }
+ DTYPE * input = numa_alloc((input_size) * sizeof(DTYPE));
+ DTYPE * querys = numa_alloc((n_querys) * sizeof(DTYPE));
+#else
DTYPE * input = malloc((input_size) * sizeof(DTYPE));
DTYPE * querys = malloc((n_querys) * sizeof(DTYPE));
+#endif
+
+#if NUMA
+ struct bitmask *bitmask_all = numa_allocate_nodemask();
+ numa_bitmask_setall(bitmask_all);
+ numa_set_membind(bitmask_all);
+ numa_free_nodemask(bitmask_all);
+#endif
DTYPE result_host = -1;
// Create an input file with arbitrary data.
create_test_file(input, input_size, querys, n_querys);
-
+
+#if NUMA
+ mp_pages[0] = input;
+ if (move_pages(0, 1, mp_pages, NULL, mp_status, 0) == -1) {
+ perror("move_pages(A)");
+ }
+ else if (mp_status[0] < 0) {
+ printf("move_pages error: %d", mp_status[0]);
+ }
+ else {
+ numa_node_in = mp_status[0];
+ }
+
+ if (numa_node_cpu != -1) {
+ if (numa_run_on_node(numa_node_cpu) == -1) {
+ perror("numa_run_on_node");
+ numa_node_cpu = -1;
+ }
+ }
+#endif
+
start(&timer, 0, 0);
- result_host = binarySearch(input, input_size - 1, querys, n_querys);
+ result_host = binarySearch(input, input_size - 1, querys, n_querys);
stop(&timer, 0);
unsigned int nr_threads = 0;
@@ -100,18 +148,30 @@ uint64_t binarySearch(DTYPE * input, uint64_t input_size, DTYPE* querys, unsigne
int status = (result_host);
if (status) {
- printf("[::] BS CPU | n_threads=%d e_type=%s n_elements=%d "
- "| throughput_MBps=%f",
+ printf("[::] BS CPU | n_threads=%d e_type=%s n_elements=%lu"
+#if NUMA
+ " numa_node_in=%d numa_node_cpu=%d numa_distance_in_cpu=%d"
+#endif
+ " | throughput_MBps=%f",
nr_threads, "uint64_t", input_size,
+#if NUMA
+ numa_node_in, numa_node_cpu, numa_distance(numa_node_in, numa_node_cpu),
+#endif
n_querys * sizeof(DTYPE) / timer.time[0]);
printf(" throughput_MOpps=%f",
- nr_threads, "uint64_t", input_size,
n_querys / timer.time[0]);
printall(&timer, 0);
} else {
printf("[ERROR]\n");
}
+
+#if NUMA
+ numa_free(input, input_size * sizeof(DTYPE));
+ numa_free(querys, n_querys * sizeof(DTYPE));
+#else
free(input);
+ free(querys);
+#endif
return status ? 0 : 1;
diff --git a/BS/baselines/cpu/run.sh b/BS/baselines/cpu/run.sh
index 022ce0e..56f8cb3 100755
--- a/BS/baselines/cpu/run.sh
+++ b/BS/baselines/cpu/run.sh
@@ -4,7 +4,7 @@ set -e
HOST="$(hostname)"
-echo $HOST
+mkdir -p "$HOST"
(
echo "prim-benchmarks BS CPU (dfatool edition)"
@@ -13,7 +13,7 @@ echo "Revision $(git describe --always)"
make -B verbose=1
-for nr_threads in 88 64 44 32 24 20 1 2 4 6 8 12 16; do
+for nr_threads in 128 96 88 64 44 32 24 20 1 2 4 6 8 12 16; do
#for vs in 262144 524288 1048576 2097152; do
# NMC also uses 262144 elements
for vs in 262144; do
@@ -23,4 +23,5 @@ for nr_threads in 88 64 44 32 24 20 1 2 4 6 8 12 16; do
done
done
done
-) | tee "${HOST}-explore.txt"
+echo "Completed at $(date)"
+) | tee "${HOST}/explore.txt"
diff --git a/BS/dimes-hetsim.sh b/BS/dimes-hetsim.sh
new file mode 100755
index 0000000..0ca7481
--- /dev/null
+++ b/BS/dimes-hetsim.sh
@@ -0,0 +1,40 @@
+#!/bin/sh
+
+cd baselines/cpu
+make -B NUMA=1
+
+mkdir -p log/$(hostname)
+fn=log/$(hostname)/$(date +%Y%m%d)-baseline.txt
+
+(
+
+for i in `seq 1 20`; do
+ for nr_threads in 1 2 4 8 12 16; do
+ for cpu in 0 1 2 3 4 5 6 7; do
+ for ram in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15; do
+ # 2048576 is INPUT_SIZE for DPU version
+ # 2048576 * uint64 ≈ 16 MiB (fits into cache)
+ OMP_NUM_THREADS=$nr_threads ./bs_omp 2048576 16777216 $ram $cpu
+ # 2^27 * uint64 == 1 GiB
+ OMP_NUM_THREADS=$nr_threads ./bs_omp $(perl -E 'say 2 ** 27') 16777216 $ram $cpu
+ # 2^29 * uint64 == 4 GiB
+ OMP_NUM_THREADS=$nr_threads ./bs_omp $(perl -E 'say 2 ** 29') 16777216 $ram $cpu
+ done
+ done
+ done
+ for nr_threads in 32 48 64 96 128; do
+ for ram in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15; do
+ # 2048576 is INPUT_SIZE for DPU version
+ # 2048576 * uint64 ≈ 16 MiB (fits into cache)
+ OMP_NUM_THREADS=$nr_threads ./bs_omp 2048576 16777216 $ram -1
+ # 2^27 * uint64 == 1 GiB
+ OMP_NUM_THREADS=$nr_threads ./bs_omp $(perl -E 'say 2 ** 27') 16777216 $ram -1
+ # 2^29 * uint64 == 4 GiB
+ OMP_NUM_THREADS=$nr_threads ./bs_omp $(perl -E 'say 2 ** 29') 16777216 $ram -1
+ done
+ done
+done
+
+) | tee $fn
+
+xz -f -v -9 -M 800M $fn