summaryrefslogtreecommitdiff
path: root/SpMV
diff options
context:
space:
mode:
authorDaniel Friesel <daniel.friesel@uos.de>2023-06-02 15:29:25 +0200
committerDaniel Friesel <daniel.friesel@uos.de>2023-06-02 15:29:25 +0200
commit39d1c972dcea37beca6e20be152a3da78143a7d3 (patch)
tree7ed0fbde0088a9d330da26db1f6890d903645529 /SpMV
parentff9304370fdd94e9b7e4c4262c59ac734f1a28fd (diff)
port SpMV to dfatool; add benchmark scripts
Diffstat (limited to 'SpMV')
-rw-r--r--SpMV/Makefile63
-rw-r--r--SpMV/baselines/cpu/Makefile25
-rw-r--r--SpMV/baselines/cpu/app.c19
-rwxr-xr-xSpMV/baselines/cpu/run-opti.sh15
-rwxr-xr-xSpMV/baselines/cpu/run.sh25
-rw-r--r--SpMV/host/app.c22
-rwxr-xr-xSpMV/run-paper-strong-full.sh22
-rwxr-xr-xSpMV/run-paper-strong-rank.sh23
-rwxr-xr-xSpMV/run-paper-weak.sh28
9 files changed, 198 insertions, 44 deletions
diff --git a/SpMV/Makefile b/SpMV/Makefile
index e517524..0e7a70c 100644
--- a/SpMV/Makefile
+++ b/SpMV/Makefile
@@ -1,30 +1,11 @@
-DPU_DIR := dpu
-HOST_DIR := host
-CPU_BASE_DIR := baselines/cpu
-GPU_BASE_DIR := baselines/gpu
-BUILDDIR ?= bin
NR_TASKLETS ?= 16
NR_DPUS ?= 1
-define conf_filename
- ${BUILDDIR}/.NR_DPUS_$(1)_NR_TASKLETS_$(2).conf
-endef
-CONF := $(call conf_filename,${NR_DPUS},${NR_TASKLETS})
-
-HOST_TARGET := ${BUILDDIR}/host_code
-DPU_TARGET := ${BUILDDIR}/dpu_code
-CPU_BASE_TARGET := ${BUILDDIR}/cpu_baseline
-GPU_BASE_TARGET := ${BUILDDIR}/gpu_baseline
-
COMMON_INCLUDES := support
-HOST_SOURCES := $(wildcard ${HOST_DIR}/*.c)
-DPU_SOURCES := $(wildcard ${DPU_DIR}/*.c)
-CPU_BASE_SOURCES := $(wildcard ${CPU_BASE_DIR}/*.c)
-GPU_BASE_SOURCES := $(wildcard ${GPU_BASE_DIR}/*.cu)
-
-.PHONY: all clean test
-
-__dirs := $(shell mkdir -p ${BUILDDIR})
+HOST_SOURCES := $(wildcard host/*.c)
+DPU_SOURCES := $(wildcard dpu/*.c)
+CPU_BASE_SOURCES := $(wildcard baselines/cpu/*.c)
+GPU_BASE_SOURCES := $(wildcard baselines/gpu/*.cu)
COMMON_FLAGS := -Wall -Wextra -g -I${COMMON_INCLUDES}
HOST_FLAGS := ${COMMON_FLAGS} -std=c11 -O3 `dpu-pkg-config --cflags --libs dpu` -DNR_TASKLETS=${NR_TASKLETS} -DNR_DPUS=${NR_DPUS}
@@ -32,29 +13,35 @@ DPU_FLAGS := ${COMMON_FLAGS} -O2 -DNR_TASKLETS=${NR_TASKLETS}
CPU_BASE_FLAGS := -O3 -fopenmp
GPU_BASE_FLAGS := -O3
-all: ${HOST_TARGET} ${DPU_TARGET} ${CPU_BASE_TARGET}
+QUIET = @
+
+ifdef verbose
+ QUIET =
+endif
-gpu: ${GPU_BASE_TARGET}
+all: bin/host_code bin/dpu_code
-${CONF}:
- $(RM) $(call conf_filename,*,*)
- touch ${CONF}
+bin:
+ ${QUIET}mkdir -p bin
-${HOST_TARGET}: ${HOST_SOURCES} ${COMMON_INCLUDES} ${CONF}
- $(CC) -o $@ ${HOST_SOURCES} ${HOST_FLAGS}
+gpu: bin/gpu_baseline
-${DPU_TARGET}: ${DPU_SOURCES} ${COMMON_INCLUDES} ${CONF}
- dpu-upmem-dpurte-clang ${DPU_FLAGS} -o $@ ${DPU_SOURCES}
+bin/host_code: ${HOST_SOURCES} ${COMMON_INCLUDES} bin
+ ${QUIET}${CC} -o $@ ${HOST_SOURCES} ${HOST_FLAGS}
-${CPU_BASE_TARGET}: ${CPU_BASE_SOURCES}
- $(CC) -o $@ ${CPU_BASE_SOURCES} ${CPU_BASE_FLAGS}
+bin/dpu_code: ${DPU_SOURCES} ${COMMON_INCLUDES} bin
+ ${QUIET}dpu-upmem-dpurte-clang ${DPU_FLAGS} -o $@ ${DPU_SOURCES}
-${GPU_BASE_TARGET}: ${GPU_BASE_SOURCES}
- nvcc -o $@ ${GPU_BASE_SOURCES} ${GPU_BASE_FLAGS}
+bin/cpu_baseline: ${CPU_BASE_SOURCES}
+ ${QUIET}${CC} -o $@ ${CPU_BASE_SOURCES} ${CPU_BASE_FLAGS}
+
+bin/gpu_baseline: ${GPU_BASE_SOURCES}
+ ${QUIET}nvcc -o $@ ${GPU_BASE_SOURCES} ${GPU_BASE_FLAGS}
clean:
- $(RM) -r $(BUILDDIR)
+ ${QUIET}rm -rf bin
test: all
- ./${HOST_TARGET}
+ ${QUIET}bin/host_code
+.PHONY: all clean test
diff --git a/SpMV/baselines/cpu/Makefile b/SpMV/baselines/cpu/Makefile
index 9c63605..64b20db 100644
--- a/SpMV/baselines/cpu/Makefile
+++ b/SpMV/baselines/cpu/Makefile
@@ -1,7 +1,24 @@
-all:
- gcc -o spmv -fopenmp app.c
+all: spmv
-clean:
- rm spmv
+spmv: app.c
+ gcc -O2 -o spmv -fopenmp app.c
+
+spmv_O0: app.c
+ gcc -o spmv_O0 -fopenmp app.c
+
+spmv_O2: app.c
+ gcc -O2 -o spmv_O2 -fopenmp app.c
+
+run: spmv
+ OMP_NUM_THREADS=4 ./spmv -f ../../data/bcsstk30.mtx -v 0
+run_O0: spmv_O0
+ OMP_NUM_THREADS=4 ./spmv_O0 -f ../../data/bcsstk30.mtx -v 0
+
+run_O2: spmv_O2
+ OMP_NUM_THREADS=4 ./spmv_O2 -f ../../data/bcsstk30.mtx -v 0
+
+clean:
+ rm -f spmv spmv_O0 spmv_O2
+.PHONY: all run run_O0 run_O2 clean
diff --git a/SpMV/baselines/cpu/app.c b/SpMV/baselines/cpu/app.c
index 46db2f0..8d360ee 100644
--- a/SpMV/baselines/cpu/app.c
+++ b/SpMV/baselines/cpu/app.c
@@ -29,7 +29,7 @@ int main(int argc, char** argv) {
// Calculating result on CPU
PRINT_INFO(p.verbosity >= 1, "Calculating result on CPU");
- omp_set_num_threads(4);
+ //omp_set_num_threads(4);
Timer timer;
startTimer(&timer);
#pragma omp parallel for
@@ -43,7 +43,22 @@ int main(int argc, char** argv) {
outVector[rowIdx] = sum;
}
stopTimer(&timer);
- if(p.verbosity == 0) PRINT("%f", getElapsedTime(timer)*1e3);
+
+
+ unsigned int nr_threads = 0;
+#pragma omp parallel
+#pragma omp atomic
+ nr_threads++;
+
+
+ // coomatrix / csrmatrix use uint32_t indexes and float values
+ printf("[::] SpMV CPU | n_threads=%u e_type=float n_elements=%u |"
+ " throughput_MBps=%f throughput_MOpps=%f timer0_us=%f\n",
+ nr_threads, csrMatrix.numNonzeros,
+ csrMatrix.numNonzeros * sizeof(float) / (getElapsedTime(timer)*1e6),
+ csrMatrix.numNonzeros / (getElapsedTime(timer)*1e6),
+ getElapsedTime(timer)*1e6);
+ //if(p.verbosity == 0) PRINT("%f", getElapsedTime(timer)*1e3);
PRINT_INFO(p.verbosity >= 1, " Elapsed time: %f ms", getElapsedTime(timer)*1e3);
// Deallocate data structures
diff --git a/SpMV/baselines/cpu/run-opti.sh b/SpMV/baselines/cpu/run-opti.sh
new file mode 100755
index 0000000..62a3e8b
--- /dev/null
+++ b/SpMV/baselines/cpu/run-opti.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+HOST="$(hostname)"
+
+echo $HOST
+
+make clean
+
+for i in $(seq 1 50); do
+ make run_O0 | sed 's/CPU/CPU O0/'
+done | tee "${HOST}-O0.txt"
+
+for i in $(seq 1 50); do
+ make run_O2 | sed 's/CPU/CPU O2/'
+done | tee "${HOST}-O2.txt"
diff --git a/SpMV/baselines/cpu/run.sh b/SpMV/baselines/cpu/run.sh
new file mode 100755
index 0000000..a993cc0
--- /dev/null
+++ b/SpMV/baselines/cpu/run.sh
@@ -0,0 +1,25 @@
+#!/bin/sh
+
+set -e
+
+HOST="$(hostname)"
+
+echo $HOST
+
+(
+
+echo "prim-benchmarks SpMV CPU (dfatool edition)"
+echo "Started at $(date)"
+echo "Revision $(git describe --always)"
+
+# default threads: 4
+
+# input size depends on file -> strong scaling only
+
+make -B
+for i in $(seq 1 50); do
+ for nr_threads in 88 64 44 1 2 4 6 8 12 16 20 24 32; do
+ OMP_NUM_THREADS=${nr_threads} timeout --foreground -k 1m 30m ./spmv -f ../../data/bcsstk30.mtx -v 0 || true
+ done
+done
+) | tee "${HOST}-explore.txt"
diff --git a/SpMV/host/app.c b/SpMV/host/app.c
index 8887410..c1cf92f 100644
--- a/SpMV/host/app.c
+++ b/SpMV/host/app.c
@@ -22,6 +22,9 @@
#define DPU_BINARY "./bin/dpu_code"
+#define XSTR(x) STR(x)
+#define STR(x) #x
+
#ifndef ENERGY
#define ENERGY 0
#endif
@@ -187,14 +190,33 @@ int main(int argc, char** argv) {
// Verify the result
PRINT_INFO(p.verbosity >= 1, "Verifying the result");
+ int status = 1;
for(uint32_t rowIdx = 0; rowIdx < numRows; ++rowIdx) {
float diff = (outVectorReference[rowIdx] - outVector[rowIdx])/outVectorReference[rowIdx];
const float tolerance = 0.00001;
if(diff > tolerance || diff < -tolerance) {
+ status = 0;
PRINT_ERROR("Mismatch at index %u (CPU result = %f, DPU result = %f)", rowIdx, outVectorReference[rowIdx], outVector[rowIdx]);
}
}
+ if (status) {
+ printf("[::] SpMV NMC | n_dpus=%d n_tasklets=%d e_type=%s n_elements=%d "
+ "| throughput_pim_MBps=%f throughput_MBps=%f",
+ // coomatrix / csrmatrix use uint32_t indexes and float values
+ numDPUs, NR_TASKLETS, "float", csrMatrix.numNonzeros,
+ csrMatrix.numNonzeros * sizeof(float) / (dpuTime * 1e6),
+ csrMatrix.numNonzeros * sizeof(uint32_t) / ((loadTime + dpuTime + retrieveTime) * 1e6)
+ );
+ printf(" throughput_pim_MOpps=%f throughput_MOpps=%f",
+ csrMatrix.numNonzeros / (dpuTime * 1e6),
+ csrMatrix.numNonzeros / ((loadTime + dpuTime + retrieveTime) * 1e6)
+ );
+ printf(" timer_load_us=%f timer_dpu_us=%f timer_retrieve_us=%f\n",
+ loadTime * 1e6, dpuTime * 1e6, retrieveTime * 1e6
+ );
+ }
+
// Display DPU Logs
if(p.verbosity >= 2) {
PRINT_INFO(p.verbosity >= 2, "Displaying DPU Logs:");
diff --git a/SpMV/run-paper-strong-full.sh b/SpMV/run-paper-strong-full.sh
new file mode 100755
index 0000000..21f3d25
--- /dev/null
+++ b/SpMV/run-paper-strong-full.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+set -e
+
+(
+
+echo "prim-benchmarks SpMV strong-full (dfatool edition)"
+echo "Started at $(date)"
+echo "Revision $(git describe --always)"
+
+for nr_dpus in 256 512 1024 2048; do
+ for nr_tasklets in 1 2 4 8 16; do
+ echo
+ if make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} verbose=1; then
+ # repetition is not part of upstream setup
+ for i in `seq 1 50`; do
+ timeout --foreground -k 1m 3m bin/host_code -v 0 || true
+ done
+ fi
+ done
+done
+) | tee log-paper-strong-full.txt
diff --git a/SpMV/run-paper-strong-rank.sh b/SpMV/run-paper-strong-rank.sh
new file mode 100755
index 0000000..e0cd45d
--- /dev/null
+++ b/SpMV/run-paper-strong-rank.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+set -e
+
+(
+
+echo "prim-benchmarks SpMV strong-rank (dfatool edition)"
+echo "Started at $(date)"
+echo "Revision $(git describe --always)"
+
+# 256 and 512 are not part of upstream
+for nr_dpus in 256 512 1 4 16 64; do
+ for nr_tasklets in 1 2 4 8 16; do
+ echo
+ if make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} verbose=1; then
+ # repetition is not part of upstream setup
+ for i in `seq 1 50`; do
+ timeout --foreground -k 1m 3m bin/host_code -v 0 || true
+ done
+ fi
+ done
+done
+) | tee log-paper-strong-rank.txt
diff --git a/SpMV/run-paper-weak.sh b/SpMV/run-paper-weak.sh
new file mode 100755
index 0000000..d2cc0ed
--- /dev/null
+++ b/SpMV/run-paper-weak.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+set -e
+
+(
+
+echo "prim-benchmarks SpMV weak (dfatool edition)"
+echo "Started at $(date)"
+echo "Revision $(git describe --always)"
+
+# 256 and 512 are not part of upstream
+for nr_dpus in 256 512 1 4 16 64; do
+ cd data/generate
+ make
+ ./replicate ../bcsstk30.mtx ${nr_dpus} /tmp/bcsstk30.mtx.${nr_dpus}.mtx
+ cd ../..
+ for nr_tasklets in 1 2 4 8 16; do
+ echo
+ if make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} verbose=1; then
+ # repetition is not part of upstream setup
+ for i in `seq 1 50`; do
+ timeout --foreground -k 1m 3m bin/host_code -v 0 -f /tmp/bcsstk30.mtx.${nr_dpus}.mtx || true
+ done
+ fi
+ done
+ rm -f /tmp/bcsstk30.mtx.${nr_dpus}.mtx
+done |
+) tee log-paper-weak.txt