From 9df092fabea63f997e22ecd64f4d5add8e3e3544 Mon Sep 17 00:00:00 2001 From: lubancat Date: Sat, 9 Apr 2022 16:22:13 +0800 Subject: [PATCH] test: microbench: armv7l --- test/microbench/test/armv7l.cc | 309 +++++++++++++++++++++++++++++++++ 1 file changed, 309 insertions(+) create mode 100644 test/microbench/test/armv7l.cc diff --git a/test/microbench/test/armv7l.cc b/test/microbench/test/armv7l.cc new file mode 100644 index 0000000..f925ade --- /dev/null +++ b/test/microbench/test/armv7l.cc @@ -0,0 +1,309 @@ +// Copyright (C) 2022 Wu Zhangjin , All Rights Reserved. +// +// Gcc Inline Assembly: https://www.ibiblio.org/gferg/ldp/GCC-Inline-Assembly-HOWTO.html +// https://www.cristal.univ-lille.fr/~marquet/ens/ctx/doc/l-ia.html +// https://wiki.osdev.org/Inline_assembly + +// X86_64 ISA: https://www.aldeid.com/wiki/X86-assembly/Instructions + +#include "benchmark/benchmark.h" + +#define OPTIMIZE_LEVEL 1 + +#if defined(OPTIMIZE_LEVEL) && (OPTIMIZE_LEVEL == 0) +#define benchmark_DoNotOptimize() benchmark::DoNotOptimize(state.iterations()); +#else +#define benchmark_DoNotOptimize() do { } while(0) +#endif + +volatile int enabled; + +void BM_nop(benchmark::State& state) { + for (auto _ : state) { + benchmark_DoNotOptimize(); + asm volatile ("nop":::"memory"); + } +} +BENCHMARK(BM_nop); +// BENCHMARK(BM_nop)->ThreadPerCpu(); + +void BM_ub(benchmark::State& state) { + for (auto _ : state) { + benchmark_DoNotOptimize(); + asm volatile ( + "1: b 2f \n" + "2:" + :::"memory"); + } +} +BENCHMARK(BM_ub); +// BENCHMARK(BM_ub)->ThreadPerCpu(); + +void BM_bnez(benchmark::State& state) { + register int x = 1; + + for (auto _ : state) { + benchmark_DoNotOptimize(); + asm volatile ( + "cmp %0, $0x0 \n" + "1: bne 2f \n" + "2:" + : + :"r" (x) + :"memory"); + } +} +BENCHMARK(BM_bnez); +// BENCHMARK(BM_bnez)->ThreadPerCpu(); + +void BM_beqz(benchmark::State& state) { + register int x = 0; + + for (auto _ : state) { + benchmark_DoNotOptimize(); + asm volatile ( + "cmp %0, $0x0 \n" + "1: beq 2f \n" + "2:" + : + :"r" (x) + :"memory"); + } +} +BENCHMARK(BM_beqz); +// BENCHMARK(BM_beqz)->ThreadPerCpu(); + +void BM_load_bnez(benchmark::State& state) { + enabled = 1; + + for (auto _ : state) { + benchmark_DoNotOptimize(); + if (enabled != 0) + asm volatile ("":::"memory"); + } +} +BENCHMARK(BM_load_bnez); +// BENCHMARK(BM_load_bnez)->ThreadPerCpu(); + +void BM_load_beqz(benchmark::State& state) { + enabled = 0; + + for (auto _ : state) { + benchmark_DoNotOptimize(); + if (enabled == 0) + asm volatile ("":::"memory"); + } +} +BENCHMARK(BM_load_beqz); +// BENCHMARK(BM_load_beqz)->ThreadPerCpu(); + +volatile int thread_start; +volatile int thread_exit; +struct th_data { + int enabled; + int type; +}; +volatile struct th_data tdata; + +enum { + CACHE_MISS = 0, + CACHE_BRANCH_MISS = 1, + BRANCH_MISS = 2, + NO_MISS = 3, +}; + +static void *thread_handler (void *data) +{ + volatile int *ptr = &enabled; + struct th_data *td = (struct th_data *)data; + long i = td->enabled; + + thread_start = 1; + + while (!thread_exit) { + switch (td->type) { + case CACHE_MISS: + *ptr = i; + break; + case BRANCH_MISS: + *ptr = 1 - i; + break; + case CACHE_BRANCH_MISS: + i = 1 - i; + *ptr = i; + break; + default: + break; + } + } + + return NULL; +} + +void BM_cache_miss_load_bnez(benchmark::State& state) { + pthread_t th; + + enabled = 1; + thread_start = 0; + thread_exit = 0; + + tdata.enabled = enabled; + tdata.type = CACHE_MISS; + + pthread_create (&th, NULL, thread_handler, (void *)&tdata); + + while (!thread_start); + + for (auto _ : state) { + benchmark_DoNotOptimize(); + if (enabled != 0) + asm volatile ("":::"memory"); + } + + thread_exit = 1; + pthread_join (th, NULL); +} +BENCHMARK(BM_cache_miss_load_bnez); +//BENCHMARK(BM_cache_miss_load_bnez)->ThreadRange(1,3); +//BENCHMARK(BM_cache_miss_load_bnez)->ThreadPerCpu(); + +void BM_cache_miss_load_beqz(benchmark::State& state) { + pthread_t th; + long i; + + enabled = 0; + thread_start = 0; + thread_exit = 0; + + tdata.enabled = enabled; + tdata.type = CACHE_MISS; + + pthread_create (&th, NULL, thread_handler, (void *)&tdata); + + while (!thread_start); + + for (auto _ : state) { + benchmark_DoNotOptimize(); + if (enabled == 0) + asm volatile ("":::"memory"); + } + + thread_exit = 1; + pthread_join (th, NULL); +} +BENCHMARK(BM_cache_miss_load_beqz); +//BENCHMARK(BM_cache_miss_load_beqz)->ThreadRange(1,3); +//BENCHMARK(BM_cache_miss_load_beqz)->ThreadPerCpu(); + +void BM_branch_miss_load_bnez(benchmark::State& state) { + pthread_t th; + + enabled = 1; + thread_start = 0; + thread_exit = 0; + + tdata.enabled = enabled; + tdata.type = BRANCH_MISS; + + pthread_create (&th, NULL, thread_handler, (void *)&tdata); + + while (!thread_start); + + for (auto _ : state) { + benchmark_DoNotOptimize(); + if (enabled != 0) + asm volatile ("":::"memory"); + } + + thread_exit = 1; + pthread_join (th, NULL); +} +BENCHMARK(BM_branch_miss_load_bnez); +//BENCHMARK(BM_branch_miss_load_bnez)->ThreadRange(1,3); +//BENCHMARK(BM_branch_miss_load_bnez)->ThreadPerCpu(); + +void BM_branch_miss_load_beqz(benchmark::State& state) { + pthread_t th; + long i; + + enabled = 0; + thread_start = 0; + thread_exit = 0; + + tdata.enabled = enabled; + tdata.type = BRANCH_MISS; + + pthread_create (&th, NULL, thread_handler, (void *)&tdata); + + while (!thread_start); + + for (auto _ : state) { + benchmark_DoNotOptimize(); + if (enabled == 0) + asm volatile ("":::"memory"); + } + + thread_exit = 1; + pthread_join (th, NULL); +} +BENCHMARK(BM_branch_miss_load_beqz); +//BENCHMARK(BM_branch_miss_load_beqz)->ThreadRange(1,3); +//BENCHMARK(BM_branch_miss_load_beqz)->ThreadPerCpu(); + +void BM_cache_branch_miss_load_bnez(benchmark::State& state) { + pthread_t th; + + enabled = 1; + thread_start = 0; + thread_exit = 0; + + tdata.enabled = enabled; + tdata.type = CACHE_BRANCH_MISS; + + pthread_create (&th, NULL, thread_handler, (void *)&tdata); + + while (!thread_start); + + for (auto _ : state) { + benchmark_DoNotOptimize(); + if (enabled != 0) + asm volatile ("":::"memory"); + } + + thread_exit = 1; + pthread_join (th, NULL); +} +BENCHMARK(BM_cache_branch_miss_load_bnez); +//BENCHMARK(BM_cache_branch_miss_load_bnez)->ThreadRange(1,3); +//BENCHMARK(BM_cache_branch_miss_load_bnez)->ThreadPerCpu(); + +void BM_cache_branch_miss_load_beqz(benchmark::State& state) { + pthread_t th; + long i; + + enabled = 0; + thread_start = 0; + thread_exit = 0; + + tdata.enabled = enabled; + tdata.type = CACHE_BRANCH_MISS; + + pthread_create (&th, NULL, thread_handler, (void *)&tdata); + + while (!thread_start); + + for (auto _ : state) { + benchmark_DoNotOptimize(); + if (enabled == 0) + asm volatile ("":::"memory"); + } + + thread_exit = 1; + pthread_join (th, NULL); +} +BENCHMARK(BM_cache_branch_miss_load_beqz); +//BENCHMARK(BM_cache_branch_miss_load_beqz)->ThreadRange(1,3); +//BENCHMARK(BM_cache_branch_miss_load_beqz)->ThreadPerCpu(); + + +BENCHMARK_MAIN(); -- Gitee