summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--BS/Makefile4
-rw-r--r--BS/baselines/cpu/Makefile2
-rw-r--r--BS/baselines/cpu/bs_omp.c9
-rwxr-xr-xBS/baselines/cpu/run-opti.sh15
-rw-r--r--BS/host/app.c10
-rwxr-xr-xBS/run-paper-strong-full.sh22
-rwxr-xr-xBS/run-paper-strong-rank.sh22
-rwxr-xr-xBS/run-paper-weak.sh23
8 files changed, 92 insertions, 15 deletions
diff --git a/BS/Makefile b/BS/Makefile
index 99dd0f8..f6b5106 100644
--- a/BS/Makefile
+++ b/BS/Makefile
@@ -26,10 +26,10 @@ all: ${HOST_TARGET} ${DPU_TARGET}
bin:
${QUIET}mkdir -p bin
-${HOST_TARGET}: ${HOST_SOURCES} ${COMMON_INCLUDES} ${CONF}
+${HOST_TARGET}: ${HOST_SOURCES} ${COMMON_INCLUDES} bin
${QUIET}${CC} -o $@ ${HOST_SOURCES} ${HOST_FLAGS}
-${DPU_TARGET}: ${DPU_SOURCES} ${COMMON_INCLUDES} ${CONF}
+${DPU_TARGET}: ${DPU_SOURCES} ${COMMON_INCLUDES} bin
${QUIET}dpu-upmem-dpurte-clang ${DPU_FLAGS} -o $@ ${DPU_SOURCES}
clean:
diff --git a/BS/baselines/cpu/Makefile b/BS/baselines/cpu/Makefile
index 1b0ceb5..8faf3c4 100644
--- a/BS/baselines/cpu/Makefile
+++ b/BS/baselines/cpu/Makefile
@@ -10,6 +10,8 @@ bs_omp_O0: bs_omp.c
bs_omp_O2: bs_omp.c
gcc -O2 bs_omp.c -o bs_omp_O2 -fopenmp
+# bs_omp performs a single iteration and must be run in a loop for proper benchmarks
+
.PHONY: run run_O0 run_O2
run: bs_omp
./bs_omp 262144 16777216
diff --git a/BS/baselines/cpu/bs_omp.c b/BS/baselines/cpu/bs_omp.c
index b8b3023..3775bce 100644
--- a/BS/baselines/cpu/bs_omp.c
+++ b/BS/baselines/cpu/bs_omp.c
@@ -100,16 +100,13 @@ uint64_t binarySearch(DTYPE * input, uint64_t input_size, DTYPE* querys, unsigne
int status = (result_host);
if (status) {
- printf("[::] n_threads=%d e_type=%s n_elements=%d "
- "| throughput_cpu_omp_MBps=%f\n",
+ printf("[::] BS CPU | n_threads=%d e_type=%s n_elements=%d "
+ "| throughput_MBps=%f",
nr_threads, "uint64_t", input_size,
n_querys * sizeof(DTYPE) / timer.time[0]);
- printf("[::] n_threads=%d e_type=%s n_elements=%d "
- "| throughput_cpu_omp_MOpps=%f\n",
+ printf(" throughput_MOpps=%f",
nr_threads, "uint64_t", input_size,
n_querys / timer.time[0]);
- printf("[::] n_threads=%d e_type=%s n_elements=%d |",
- nr_threads, "uint64_t", input_size);
printall(&timer, 0);
} else {
printf("[ERROR]\n");
diff --git a/BS/baselines/cpu/run-opti.sh b/BS/baselines/cpu/run-opti.sh
new file mode 100755
index 0000000..62a3e8b
--- /dev/null
+++ b/BS/baselines/cpu/run-opti.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+HOST="$(hostname)"
+
+echo $HOST
+
+make clean
+
+for i in $(seq 1 50); do
+ make run_O0 | sed 's/CPU/CPU O0/'
+done | tee "${HOST}-O0.txt"
+
+for i in $(seq 1 50); do
+ make run_O2 | sed 's/CPU/CPU O2/'
+done | tee "${HOST}-O2.txt"
diff --git a/BS/host/app.c b/BS/host/app.c
index 85587ac..1160163 100644
--- a/BS/host/app.c
+++ b/BS/host/app.c
@@ -214,20 +214,16 @@ int main(int argc, char **argv) {
if (status) {
printf("[" ANSI_COLOR_GREEN "OK" ANSI_COLOR_RESET "] results are equal\n");
if (rep >= p.n_warmup) {
- printf("[::] n_dpus=%d n_tasklets=%d e_type=%s n_elements=%lu "
- "| throughput_cpu_MBps=%f throughput_pim_MBps=%f throughput_MBps=%f\n",
+ printf("[::] BS NMC | n_dpus=%d n_tasklets=%d e_type=%s n_elements=%lu "
+ "| throughput_cpu_MBps=%f throughput_pim_MBps=%f throughput_MBps=%f",
nr_of_dpus, NR_TASKLETS, XSTR(DTYPE), input_size,
num_querys * sizeof(DTYPE) / timer.time[0],
num_querys * sizeof(DTYPE) / timer.time[2],
num_querys * sizeof(DTYPE) / (timer.time[1] + timer.time[2] + timer.time[3]));
- printf("[::] n_dpus=%d n_tasklets=%d e_type=%s n_elements=%lu "
- "| throughput_cpu_MOpps=%f throughput_pim_MOpps=%f throughput_MOpps=%f\n",
- nr_of_dpus, NR_TASKLETS, XSTR(DTYPE), input_size,
+ printf(" throughput_cpu_MOpps=%f throughput_pim_MOpps=%f throughput_MOpps=%f",
num_querys / timer.time[0],
num_querys / timer.time[2],
num_querys / (timer.time[1] + timer.time[2] + timer.time[3]));
- printf("[::] n_dpus=%d n_tasklets=%d e_type=%s n_elements=%lu |",
- nr_of_dpus, NR_TASKLETS, XSTR(DTYPE), input_size);
printall(&timer, 3);
}
} else {
diff --git a/BS/run-paper-strong-full.sh b/BS/run-paper-strong-full.sh
new file mode 100755
index 0000000..3617ffe
--- /dev/null
+++ b/BS/run-paper-strong-full.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+set -e
+
+# BL: use 2^(BL) B blocks for MRAM <-> WRAM transfers on PIM module
+# T: data type
+# -w: number of un-timed warmup iterations
+# -e: number of timed iterations
+# -i; ignored, always uses 262144 elements
+
+echo "prim-benchmarks BS strong-full (dfatool edition)"
+echo "Started at $(date)"
+echo "Revision $(git describe --always)"
+
+for nr_dpus in 256 512 1024 2048; do
+ for nr_tasklets in 1 2 4 8 16; do
+ echo
+ if make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=10; then
+ timeout --foreground -k 1m 30m bin/bs_host -w 0 -e 100 -i 16777216 || true
+ fi
+ done
+done | tee log-paper-strong-full.txt
diff --git a/BS/run-paper-strong-rank.sh b/BS/run-paper-strong-rank.sh
new file mode 100755
index 0000000..96403c1
--- /dev/null
+++ b/BS/run-paper-strong-rank.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+set -e
+
+# BL: use 2^(BL) B blocks for MRAM <-> WRAM transfers on PIM module
+# T: data type
+# -w: number of un-timed warmup iterations
+# -e: number of timed iterations
+# -i; ignored, always uses 262144 elements
+
+echo "prim-benchmarks BS strong-rank (dfatool edition)"
+echo "Started at $(date)"
+echo "Revision $(git describe --always)"
+
+for nr_dpus in 1 4 16 64; do
+ for nr_tasklets in 1 2 4 8 16; do
+ echo
+ if make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=10; then
+ timeout --foreground -k 1m 30m bin/bs_host -w 0 -e 100 -i 262144 || true
+ fi
+ done
+done | tee log-paper-strong-rank.txt
diff --git a/BS/run-paper-weak.sh b/BS/run-paper-weak.sh
new file mode 100755
index 0000000..26f5acf
--- /dev/null
+++ b/BS/run-paper-weak.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+set -e
+
+# BL: use 2^(BL) B blocks for MRAM <-> WRAM transfers on PIM module
+# T: data type
+# -w: number of un-timed warmup iterations
+# -e: number of timed iterations
+# -i; ignored, always uses 262144 elements
+
+echo "prim-benchmarks BS weak (dfatool edition)"
+echo "Started at $(date)"
+echo "Revision $(git describe --always)"
+
+for nr_dpus in 1 4 16 64; do
+ for nr_tasklets in 1 2 4 8 16; do
+ echo
+ if make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=10; then
+ i=$(( nr_dpus * 262144 ))
+ timeout --foreground -k 1m 30m bin/bs_host -w 0 -e 100 -i $i || true
+ fi
+ done
+done | tee log-paper-weak.txt