703 lines
14 KiB
C
703 lines
14 KiB
C
|
|
/*
|
||
|
|
* Microbenchmark for math functions.
|
||
|
|
*
|
||
|
|
* Copyright (c) 2018-2022, Arm Limited.
|
||
|
|
* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
|
||
|
|
*/
|
||
|
|
|
||
|
|
#undef _GNU_SOURCE
|
||
|
|
#define _GNU_SOURCE 1
|
||
|
|
#include <stdint.h>
|
||
|
|
#include <stdlib.h>
|
||
|
|
#include <stdio.h>
|
||
|
|
#include <string.h>
|
||
|
|
#include <time.h>
|
||
|
|
#include <math.h>
|
||
|
|
#include "mathlib.h"
|
||
|
|
|
||
|
|
#ifndef WANT_VMATH
|
||
|
|
/* Enable the build of vector math code. */
|
||
|
|
# define WANT_VMATH 1
|
||
|
|
#endif
|
||
|
|
|
||
|
|
/* Number of measurements, best result is reported. */
|
||
|
|
#define MEASURE 60
|
||
|
|
/* Array size. */
|
||
|
|
#define N 8000
|
||
|
|
/* Iterations over the array. */
|
||
|
|
#define ITER 125
|
||
|
|
|
||
|
|
static double *Trace;
|
||
|
|
static size_t trace_size;
|
||
|
|
static double A[N];
|
||
|
|
static float Af[N];
|
||
|
|
static long measurecount = MEASURE;
|
||
|
|
static long itercount = ITER;
|
||
|
|
|
||
|
|
#if __aarch64__ && WANT_VMATH
|
||
|
|
typedef __f64x2_t v_double;
|
||
|
|
|
||
|
|
#define v_double_len() 2
|
||
|
|
|
||
|
|
static inline v_double
|
||
|
|
v_double_load (const double *p)
|
||
|
|
{
|
||
|
|
return (v_double){p[0], p[1]};
|
||
|
|
}
|
||
|
|
|
||
|
|
static inline v_double
|
||
|
|
v_double_dup (double x)
|
||
|
|
{
|
||
|
|
return (v_double){x, x};
|
||
|
|
}
|
||
|
|
|
||
|
|
typedef __f32x4_t v_float;
|
||
|
|
|
||
|
|
#define v_float_len() 4
|
||
|
|
|
||
|
|
static inline v_float
|
||
|
|
v_float_load (const float *p)
|
||
|
|
{
|
||
|
|
return (v_float){p[0], p[1], p[2], p[3]};
|
||
|
|
}
|
||
|
|
|
||
|
|
static inline v_float
|
||
|
|
v_float_dup (float x)
|
||
|
|
{
|
||
|
|
return (v_float){x, x, x, x};
|
||
|
|
}
|
||
|
|
#if WANT_SVE_MATH
|
||
|
|
#include <arm_sve.h>
|
||
|
|
typedef svbool_t sv_bool;
|
||
|
|
typedef svfloat64_t sv_double;
|
||
|
|
|
||
|
|
#define sv_double_len() svcntd()
|
||
|
|
|
||
|
|
static inline sv_double
|
||
|
|
sv_double_load (const double *p)
|
||
|
|
{
|
||
|
|
svbool_t pg = svptrue_b64();
|
||
|
|
return svld1(pg, p);
|
||
|
|
}
|
||
|
|
|
||
|
|
static inline sv_double
|
||
|
|
sv_double_dup (double x)
|
||
|
|
{
|
||
|
|
return svdup_n_f64(x);
|
||
|
|
}
|
||
|
|
|
||
|
|
typedef svfloat32_t sv_float;
|
||
|
|
|
||
|
|
#define sv_float_len() svcntw()
|
||
|
|
|
||
|
|
static inline sv_float
|
||
|
|
sv_float_load (const float *p)
|
||
|
|
{
|
||
|
|
svbool_t pg = svptrue_b32();
|
||
|
|
return svld1(pg, p);
|
||
|
|
}
|
||
|
|
|
||
|
|
static inline sv_float
|
||
|
|
sv_float_dup (float x)
|
||
|
|
{
|
||
|
|
return svdup_n_f32(x);
|
||
|
|
}
|
||
|
|
#endif
|
||
|
|
#else
|
||
|
|
/* dummy definitions to make things compile. */
|
||
|
|
typedef double v_double;
|
||
|
|
typedef float v_float;
|
||
|
|
#define v_double_len(x) 1
|
||
|
|
#define v_double_load(x) (x)[0]
|
||
|
|
#define v_double_dup(x) (x)
|
||
|
|
#define v_float_len(x) 1
|
||
|
|
#define v_float_load(x) (x)[0]
|
||
|
|
#define v_float_dup(x) (x)
|
||
|
|
#endif
|
||
|
|
|
||
|
|
static double
|
||
|
|
dummy (double x)
|
||
|
|
{
|
||
|
|
return x;
|
||
|
|
}
|
||
|
|
|
||
|
|
static float
|
||
|
|
dummyf (float x)
|
||
|
|
{
|
||
|
|
return x;
|
||
|
|
}
|
||
|
|
#if WANT_VMATH
|
||
|
|
#if __aarch64__
|
||
|
|
static v_double
|
||
|
|
__v_dummy (v_double x)
|
||
|
|
{
|
||
|
|
return x;
|
||
|
|
}
|
||
|
|
|
||
|
|
static v_float
|
||
|
|
__v_dummyf (v_float x)
|
||
|
|
{
|
||
|
|
return x;
|
||
|
|
}
|
||
|
|
|
||
|
|
#ifdef __vpcs
|
||
|
|
__vpcs static v_double
|
||
|
|
__vn_dummy (v_double x)
|
||
|
|
{
|
||
|
|
return x;
|
||
|
|
}
|
||
|
|
|
||
|
|
__vpcs static v_float
|
||
|
|
__vn_dummyf (v_float x)
|
||
|
|
{
|
||
|
|
return x;
|
||
|
|
}
|
||
|
|
#endif
|
||
|
|
#if WANT_SVE_MATH
|
||
|
|
static sv_double
|
||
|
|
__sv_dummy (sv_double x, sv_bool pg)
|
||
|
|
{
|
||
|
|
return x;
|
||
|
|
}
|
||
|
|
|
||
|
|
static sv_float
|
||
|
|
__sv_dummyf (sv_float x, sv_bool pg)
|
||
|
|
{
|
||
|
|
return x;
|
||
|
|
}
|
||
|
|
|
||
|
|
#endif
|
||
|
|
#endif
|
||
|
|
#endif
|
||
|
|
|
||
|
|
#include "test/mathbench_wrappers.h"
|
||
|
|
|
||
|
|
static const struct fun
|
||
|
|
{
|
||
|
|
const char *name;
|
||
|
|
int prec;
|
||
|
|
int vec;
|
||
|
|
double lo;
|
||
|
|
double hi;
|
||
|
|
union
|
||
|
|
{
|
||
|
|
double (*d) (double);
|
||
|
|
float (*f) (float);
|
||
|
|
v_double (*vd) (v_double);
|
||
|
|
v_float (*vf) (v_float);
|
||
|
|
#ifdef __vpcs
|
||
|
|
__vpcs v_double (*vnd) (v_double);
|
||
|
|
__vpcs v_float (*vnf) (v_float);
|
||
|
|
#endif
|
||
|
|
#if WANT_SVE_MATH
|
||
|
|
sv_double (*svd) (sv_double, sv_bool);
|
||
|
|
sv_float (*svf) (sv_float, sv_bool);
|
||
|
|
#endif
|
||
|
|
} fun;
|
||
|
|
} funtab[] = {
|
||
|
|
#define D(func, lo, hi) {#func, 'd', 0, lo, hi, {.d = func}},
|
||
|
|
#define F(func, lo, hi) {#func, 'f', 0, lo, hi, {.f = func}},
|
||
|
|
#define VD(func, lo, hi) {#func, 'd', 'v', lo, hi, {.vd = func}},
|
||
|
|
#define VF(func, lo, hi) {#func, 'f', 'v', lo, hi, {.vf = func}},
|
||
|
|
#define VND(func, lo, hi) {#func, 'd', 'n', lo, hi, {.vnd = func}},
|
||
|
|
#define VNF(func, lo, hi) {#func, 'f', 'n', lo, hi, {.vnf = func}},
|
||
|
|
#define SVD(func, lo, hi) {#func, 'd', 's', lo, hi, {.svd = func}},
|
||
|
|
#define SVF(func, lo, hi) {#func, 'f', 's', lo, hi, {.svf = func}},
|
||
|
|
D (dummy, 1.0, 2.0)
|
||
|
|
F (dummyf, 1.0, 2.0)
|
||
|
|
#if WANT_VMATH
|
||
|
|
#if __aarch64__
|
||
|
|
VD (__v_dummy, 1.0, 2.0)
|
||
|
|
VF (__v_dummyf, 1.0, 2.0)
|
||
|
|
#ifdef __vpcs
|
||
|
|
VND (__vn_dummy, 1.0, 2.0)
|
||
|
|
VNF (__vn_dummyf, 1.0, 2.0)
|
||
|
|
#endif
|
||
|
|
#if WANT_SVE_MATH
|
||
|
|
SVD (__sv_dummy, 1.0, 2.0)
|
||
|
|
SVF (__sv_dummyf, 1.0, 2.0)
|
||
|
|
#endif
|
||
|
|
#endif
|
||
|
|
#endif
|
||
|
|
#include "test/mathbench_funcs.h"
|
||
|
|
{0},
|
||
|
|
#undef F
|
||
|
|
#undef D
|
||
|
|
#undef VF
|
||
|
|
#undef VD
|
||
|
|
#undef VNF
|
||
|
|
#undef VND
|
||
|
|
#undef SVF
|
||
|
|
#undef SVD
|
||
|
|
};
|
||
|
|
|
||
|
|
static void
|
||
|
|
gen_linear (double lo, double hi)
|
||
|
|
{
|
||
|
|
for (int i = 0; i < N; i++)
|
||
|
|
A[i] = (lo * (N - i) + hi * i) / N;
|
||
|
|
}
|
||
|
|
|
||
|
|
static void
|
||
|
|
genf_linear (double lo, double hi)
|
||
|
|
{
|
||
|
|
for (int i = 0; i < N; i++)
|
||
|
|
Af[i] = (float)(lo * (N - i) + hi * i) / N;
|
||
|
|
}
|
||
|
|
|
||
|
|
static inline double
|
||
|
|
asdouble (uint64_t i)
|
||
|
|
{
|
||
|
|
union
|
||
|
|
{
|
||
|
|
uint64_t i;
|
||
|
|
double f;
|
||
|
|
} u = {i};
|
||
|
|
return u.f;
|
||
|
|
}
|
||
|
|
|
||
|
|
static uint64_t seed = 0x0123456789abcdef;
|
||
|
|
|
||
|
|
static double
|
||
|
|
frand (double lo, double hi)
|
||
|
|
{
|
||
|
|
seed = 6364136223846793005ULL * seed + 1;
|
||
|
|
return lo + (hi - lo) * (asdouble (seed >> 12 | 0x3ffULL << 52) - 1.0);
|
||
|
|
}
|
||
|
|
|
||
|
|
static void
|
||
|
|
gen_rand (double lo, double hi)
|
||
|
|
{
|
||
|
|
for (int i = 0; i < N; i++)
|
||
|
|
A[i] = frand (lo, hi);
|
||
|
|
}
|
||
|
|
|
||
|
|
static void
|
||
|
|
genf_rand (double lo, double hi)
|
||
|
|
{
|
||
|
|
for (int i = 0; i < N; i++)
|
||
|
|
Af[i] = (float)frand (lo, hi);
|
||
|
|
}
|
||
|
|
|
||
|
|
static void
|
||
|
|
gen_trace (int index)
|
||
|
|
{
|
||
|
|
for (int i = 0; i < N; i++)
|
||
|
|
A[i] = Trace[index + i];
|
||
|
|
}
|
||
|
|
|
||
|
|
static void
|
||
|
|
genf_trace (int index)
|
||
|
|
{
|
||
|
|
for (int i = 0; i < N; i++)
|
||
|
|
Af[i] = (float)Trace[index + i];
|
||
|
|
}
|
||
|
|
|
||
|
|
static void
|
||
|
|
run_thruput (double f (double))
|
||
|
|
{
|
||
|
|
for (int i = 0; i < N; i++)
|
||
|
|
f (A[i]);
|
||
|
|
}
|
||
|
|
|
||
|
|
static void
|
||
|
|
runf_thruput (float f (float))
|
||
|
|
{
|
||
|
|
for (int i = 0; i < N; i++)
|
||
|
|
f (Af[i]);
|
||
|
|
}
|
||
|
|
|
||
|
|
volatile double zero = 0;
|
||
|
|
|
||
|
|
static void
|
||
|
|
run_latency (double f (double))
|
||
|
|
{
|
||
|
|
double z = zero;
|
||
|
|
double prev = z;
|
||
|
|
for (int i = 0; i < N; i++)
|
||
|
|
prev = f (A[i] + prev * z);
|
||
|
|
}
|
||
|
|
|
||
|
|
static void
|
||
|
|
runf_latency (float f (float))
|
||
|
|
{
|
||
|
|
float z = (float)zero;
|
||
|
|
float prev = z;
|
||
|
|
for (int i = 0; i < N; i++)
|
||
|
|
prev = f (Af[i] + prev * z);
|
||
|
|
}
|
||
|
|
|
||
|
|
static void
|
||
|
|
run_v_thruput (v_double f (v_double))
|
||
|
|
{
|
||
|
|
for (int i = 0; i < N; i += v_double_len ())
|
||
|
|
f (v_double_load (A+i));
|
||
|
|
}
|
||
|
|
|
||
|
|
static void
|
||
|
|
runf_v_thruput (v_float f (v_float))
|
||
|
|
{
|
||
|
|
for (int i = 0; i < N; i += v_float_len ())
|
||
|
|
f (v_float_load (Af+i));
|
||
|
|
}
|
||
|
|
|
||
|
|
static void
|
||
|
|
run_v_latency (v_double f (v_double))
|
||
|
|
{
|
||
|
|
v_double z = v_double_dup (zero);
|
||
|
|
v_double prev = z;
|
||
|
|
for (int i = 0; i < N; i += v_double_len ())
|
||
|
|
prev = f (v_double_load (A+i) + prev * z);
|
||
|
|
}
|
||
|
|
|
||
|
|
static void
|
||
|
|
runf_v_latency (v_float f (v_float))
|
||
|
|
{
|
||
|
|
v_float z = v_float_dup (zero);
|
||
|
|
v_float prev = z;
|
||
|
|
for (int i = 0; i < N; i += v_float_len ())
|
||
|
|
prev = f (v_float_load (Af+i) + prev * z);
|
||
|
|
}
|
||
|
|
|
||
|
|
#ifdef __vpcs
|
||
|
|
static void
|
||
|
|
run_vn_thruput (__vpcs v_double f (v_double))
|
||
|
|
{
|
||
|
|
for (int i = 0; i < N; i += v_double_len ())
|
||
|
|
f (v_double_load (A+i));
|
||
|
|
}
|
||
|
|
|
||
|
|
static void
|
||
|
|
runf_vn_thruput (__vpcs v_float f (v_float))
|
||
|
|
{
|
||
|
|
for (int i = 0; i < N; i += v_float_len ())
|
||
|
|
f (v_float_load (Af+i));
|
||
|
|
}
|
||
|
|
|
||
|
|
static void
|
||
|
|
run_vn_latency (__vpcs v_double f (v_double))
|
||
|
|
{
|
||
|
|
v_double z = v_double_dup (zero);
|
||
|
|
v_double prev = z;
|
||
|
|
for (int i = 0; i < N; i += v_double_len ())
|
||
|
|
prev = f (v_double_load (A+i) + prev * z);
|
||
|
|
}
|
||
|
|
|
||
|
|
static void
|
||
|
|
runf_vn_latency (__vpcs v_float f (v_float))
|
||
|
|
{
|
||
|
|
v_float z = v_float_dup (zero);
|
||
|
|
v_float prev = z;
|
||
|
|
for (int i = 0; i < N; i += v_float_len ())
|
||
|
|
prev = f (v_float_load (Af+i) + prev * z);
|
||
|
|
}
|
||
|
|
#endif
|
||
|
|
|
||
|
|
#if WANT_SVE_MATH
|
||
|
|
static void
|
||
|
|
run_sv_thruput (sv_double f (sv_double, sv_bool))
|
||
|
|
{
|
||
|
|
for (int i = 0; i < N; i += sv_double_len ())
|
||
|
|
f (sv_double_load (A+i), svptrue_b64 ());
|
||
|
|
}
|
||
|
|
|
||
|
|
static void
|
||
|
|
runf_sv_thruput (sv_float f (sv_float, sv_bool))
|
||
|
|
{
|
||
|
|
for (int i = 0; i < N; i += sv_float_len ())
|
||
|
|
f (sv_float_load (Af+i), svptrue_b32 ());
|
||
|
|
}
|
||
|
|
|
||
|
|
static void
|
||
|
|
run_sv_latency (sv_double f (sv_double, sv_bool))
|
||
|
|
{
|
||
|
|
sv_double z = sv_double_dup (zero);
|
||
|
|
sv_double prev = z;
|
||
|
|
for (int i = 0; i < N; i += sv_double_len ())
|
||
|
|
prev = f (svmad_f64_x (svptrue_b64 (), prev, z, sv_double_load (A+i)), svptrue_b64 ());
|
||
|
|
}
|
||
|
|
|
||
|
|
static void
|
||
|
|
runf_sv_latency (sv_float f (sv_float, sv_bool))
|
||
|
|
{
|
||
|
|
sv_float z = sv_float_dup (zero);
|
||
|
|
sv_float prev = z;
|
||
|
|
for (int i = 0; i < N; i += sv_float_len ())
|
||
|
|
prev = f (svmad_f32_x (svptrue_b32 (), prev, z, sv_float_load (Af+i)), svptrue_b32 ());
|
||
|
|
}
|
||
|
|
#endif
|
||
|
|
|
||
|
|
static uint64_t
|
||
|
|
tic (void)
|
||
|
|
{
|
||
|
|
struct timespec ts;
|
||
|
|
if (clock_gettime (CLOCK_REALTIME, &ts))
|
||
|
|
abort ();
|
||
|
|
return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
|
||
|
|
}
|
||
|
|
|
||
|
|
#define TIMEIT(run, f) do { \
|
||
|
|
dt = -1; \
|
||
|
|
run (f); /* Warm up. */ \
|
||
|
|
for (int j = 0; j < measurecount; j++) \
|
||
|
|
{ \
|
||
|
|
uint64_t t0 = tic (); \
|
||
|
|
for (int i = 0; i < itercount; i++) \
|
||
|
|
run (f); \
|
||
|
|
uint64_t t1 = tic (); \
|
||
|
|
if (t1 - t0 < dt) \
|
||
|
|
dt = t1 - t0; \
|
||
|
|
} \
|
||
|
|
} while (0)
|
||
|
|
|
||
|
|
static void
|
||
|
|
bench1 (const struct fun *f, int type, double lo, double hi)
|
||
|
|
{
|
||
|
|
uint64_t dt = 0;
|
||
|
|
uint64_t ns100;
|
||
|
|
const char *s = type == 't' ? "rthruput" : "latency";
|
||
|
|
int vlen = 1;
|
||
|
|
|
||
|
|
if (f->vec && f->prec == 'd')
|
||
|
|
vlen = v_double_len();
|
||
|
|
else if (f->vec && f->prec == 'f')
|
||
|
|
vlen = v_float_len();
|
||
|
|
|
||
|
|
if (f->prec == 'd' && type == 't' && f->vec == 0)
|
||
|
|
TIMEIT (run_thruput, f->fun.d);
|
||
|
|
else if (f->prec == 'd' && type == 'l' && f->vec == 0)
|
||
|
|
TIMEIT (run_latency, f->fun.d);
|
||
|
|
else if (f->prec == 'f' && type == 't' && f->vec == 0)
|
||
|
|
TIMEIT (runf_thruput, f->fun.f);
|
||
|
|
else if (f->prec == 'f' && type == 'l' && f->vec == 0)
|
||
|
|
TIMEIT (runf_latency, f->fun.f);
|
||
|
|
else if (f->prec == 'd' && type == 't' && f->vec == 'v')
|
||
|
|
TIMEIT (run_v_thruput, f->fun.vd);
|
||
|
|
else if (f->prec == 'd' && type == 'l' && f->vec == 'v')
|
||
|
|
TIMEIT (run_v_latency, f->fun.vd);
|
||
|
|
else if (f->prec == 'f' && type == 't' && f->vec == 'v')
|
||
|
|
TIMEIT (runf_v_thruput, f->fun.vf);
|
||
|
|
else if (f->prec == 'f' && type == 'l' && f->vec == 'v')
|
||
|
|
TIMEIT (runf_v_latency, f->fun.vf);
|
||
|
|
#ifdef __vpcs
|
||
|
|
else if (f->prec == 'd' && type == 't' && f->vec == 'n')
|
||
|
|
TIMEIT (run_vn_thruput, f->fun.vnd);
|
||
|
|
else if (f->prec == 'd' && type == 'l' && f->vec == 'n')
|
||
|
|
TIMEIT (run_vn_latency, f->fun.vnd);
|
||
|
|
else if (f->prec == 'f' && type == 't' && f->vec == 'n')
|
||
|
|
TIMEIT (runf_vn_thruput, f->fun.vnf);
|
||
|
|
else if (f->prec == 'f' && type == 'l' && f->vec == 'n')
|
||
|
|
TIMEIT (runf_vn_latency, f->fun.vnf);
|
||
|
|
#endif
|
||
|
|
#if WANT_SVE_MATH
|
||
|
|
else if (f->prec == 'd' && type == 't' && f->vec == 's')
|
||
|
|
TIMEIT (run_sv_thruput, f->fun.svd);
|
||
|
|
else if (f->prec == 'd' && type == 'l' && f->vec == 's')
|
||
|
|
TIMEIT (run_sv_latency, f->fun.svd);
|
||
|
|
else if (f->prec == 'f' && type == 't' && f->vec == 's')
|
||
|
|
TIMEIT (runf_sv_thruput, f->fun.svf);
|
||
|
|
else if (f->prec == 'f' && type == 'l' && f->vec == 's')
|
||
|
|
TIMEIT (runf_sv_latency, f->fun.svf);
|
||
|
|
#endif
|
||
|
|
|
||
|
|
if (type == 't')
|
||
|
|
{
|
||
|
|
ns100 = (100 * dt + itercount * N / 2) / (itercount * N);
|
||
|
|
printf ("%9s %8s: %4u.%02u ns/elem %10llu ns in [%g %g]\n", f->name, s,
|
||
|
|
(unsigned) (ns100 / 100), (unsigned) (ns100 % 100),
|
||
|
|
(unsigned long long) dt, lo, hi);
|
||
|
|
}
|
||
|
|
else if (type == 'l')
|
||
|
|
{
|
||
|
|
ns100 = (100 * dt + itercount * N / vlen / 2) / (itercount * N / vlen);
|
||
|
|
printf ("%9s %8s: %4u.%02u ns/call %10llu ns in [%g %g]\n", f->name, s,
|
||
|
|
(unsigned) (ns100 / 100), (unsigned) (ns100 % 100),
|
||
|
|
(unsigned long long) dt, lo, hi);
|
||
|
|
}
|
||
|
|
fflush (stdout);
|
||
|
|
}
|
||
|
|
|
||
|
|
static void
|
||
|
|
bench (const struct fun *f, double lo, double hi, int type, int gen)
|
||
|
|
{
|
||
|
|
if (f->prec == 'd' && gen == 'r')
|
||
|
|
gen_rand (lo, hi);
|
||
|
|
else if (f->prec == 'd' && gen == 'l')
|
||
|
|
gen_linear (lo, hi);
|
||
|
|
else if (f->prec == 'd' && gen == 't')
|
||
|
|
gen_trace (0);
|
||
|
|
else if (f->prec == 'f' && gen == 'r')
|
||
|
|
genf_rand (lo, hi);
|
||
|
|
else if (f->prec == 'f' && gen == 'l')
|
||
|
|
genf_linear (lo, hi);
|
||
|
|
else if (f->prec == 'f' && gen == 't')
|
||
|
|
genf_trace (0);
|
||
|
|
|
||
|
|
if (gen == 't')
|
||
|
|
hi = trace_size / N;
|
||
|
|
|
||
|
|
if (type == 'b' || type == 't')
|
||
|
|
bench1 (f, 't', lo, hi);
|
||
|
|
|
||
|
|
if (type == 'b' || type == 'l')
|
||
|
|
bench1 (f, 'l', lo, hi);
|
||
|
|
|
||
|
|
for (int i = N; i < trace_size; i += N)
|
||
|
|
{
|
||
|
|
if (f->prec == 'd')
|
||
|
|
gen_trace (i);
|
||
|
|
else
|
||
|
|
genf_trace (i);
|
||
|
|
|
||
|
|
lo = i / N;
|
||
|
|
if (type == 'b' || type == 't')
|
||
|
|
bench1 (f, 't', lo, hi);
|
||
|
|
|
||
|
|
if (type == 'b' || type == 'l')
|
||
|
|
bench1 (f, 'l', lo, hi);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
static void
|
||
|
|
readtrace (const char *name)
|
||
|
|
{
|
||
|
|
int n = 0;
|
||
|
|
FILE *f = strcmp (name, "-") == 0 ? stdin : fopen (name, "r");
|
||
|
|
if (!f)
|
||
|
|
{
|
||
|
|
printf ("openning \"%s\" failed: %m\n", name);
|
||
|
|
exit (1);
|
||
|
|
}
|
||
|
|
for (;;)
|
||
|
|
{
|
||
|
|
if (n >= trace_size)
|
||
|
|
{
|
||
|
|
trace_size += N;
|
||
|
|
Trace = realloc (Trace, trace_size * sizeof (Trace[0]));
|
||
|
|
if (Trace == NULL)
|
||
|
|
{
|
||
|
|
printf ("out of memory\n");
|
||
|
|
exit (1);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
if (fscanf (f, "%lf", Trace + n) != 1)
|
||
|
|
break;
|
||
|
|
n++;
|
||
|
|
}
|
||
|
|
if (ferror (f) || n == 0)
|
||
|
|
{
|
||
|
|
printf ("reading \"%s\" failed: %m\n", name);
|
||
|
|
exit (1);
|
||
|
|
}
|
||
|
|
fclose (f);
|
||
|
|
if (n % N == 0)
|
||
|
|
trace_size = n;
|
||
|
|
for (int i = 0; n < trace_size; n++, i++)
|
||
|
|
Trace[n] = Trace[i];
|
||
|
|
}
|
||
|
|
|
||
|
|
static void
|
||
|
|
usage (void)
|
||
|
|
{
|
||
|
|
printf ("usage: ./mathbench [-g rand|linear|trace] [-t latency|thruput|both] "
|
||
|
|
"[-i low high] [-f tracefile] [-m measurements] [-c iterations] func "
|
||
|
|
"[func2 ..]\n");
|
||
|
|
printf ("func:\n");
|
||
|
|
printf ("%7s [run all benchmarks]\n", "all");
|
||
|
|
for (const struct fun *f = funtab; f->name; f++)
|
||
|
|
printf ("%7s [low: %g high: %g]\n", f->name, f->lo, f->hi);
|
||
|
|
exit (1);
|
||
|
|
}
|
||
|
|
|
||
|
|
int
|
||
|
|
main (int argc, char *argv[])
|
||
|
|
{
|
||
|
|
int usergen = 0, gen = 'r', type = 'b', all = 0;
|
||
|
|
double lo = 0, hi = 0;
|
||
|
|
const char *tracefile = "-";
|
||
|
|
|
||
|
|
argv++;
|
||
|
|
argc--;
|
||
|
|
for (;;)
|
||
|
|
{
|
||
|
|
if (argc <= 0)
|
||
|
|
usage ();
|
||
|
|
if (argv[0][0] != '-')
|
||
|
|
break;
|
||
|
|
else if (argc >= 3 && strcmp (argv[0], "-i") == 0)
|
||
|
|
{
|
||
|
|
usergen = 1;
|
||
|
|
lo = strtod (argv[1], 0);
|
||
|
|
hi = strtod (argv[2], 0);
|
||
|
|
argv += 3;
|
||
|
|
argc -= 3;
|
||
|
|
}
|
||
|
|
else if (argc >= 2 && strcmp (argv[0], "-m") == 0)
|
||
|
|
{
|
||
|
|
measurecount = strtol (argv[1], 0, 0);
|
||
|
|
argv += 2;
|
||
|
|
argc -= 2;
|
||
|
|
}
|
||
|
|
else if (argc >= 2 && strcmp (argv[0], "-c") == 0)
|
||
|
|
{
|
||
|
|
itercount = strtol (argv[1], 0, 0);
|
||
|
|
argv += 2;
|
||
|
|
argc -= 2;
|
||
|
|
}
|
||
|
|
else if (argc >= 2 && strcmp (argv[0], "-g") == 0)
|
||
|
|
{
|
||
|
|
gen = argv[1][0];
|
||
|
|
if (strchr ("rlt", gen) == 0)
|
||
|
|
usage ();
|
||
|
|
argv += 2;
|
||
|
|
argc -= 2;
|
||
|
|
}
|
||
|
|
else if (argc >= 2 && strcmp (argv[0], "-f") == 0)
|
||
|
|
{
|
||
|
|
gen = 't'; /* -f implies -g trace. */
|
||
|
|
tracefile = argv[1];
|
||
|
|
argv += 2;
|
||
|
|
argc -= 2;
|
||
|
|
}
|
||
|
|
else if (argc >= 2 && strcmp (argv[0], "-t") == 0)
|
||
|
|
{
|
||
|
|
type = argv[1][0];
|
||
|
|
if (strchr ("ltb", type) == 0)
|
||
|
|
usage ();
|
||
|
|
argv += 2;
|
||
|
|
argc -= 2;
|
||
|
|
}
|
||
|
|
else
|
||
|
|
usage ();
|
||
|
|
}
|
||
|
|
if (gen == 't')
|
||
|
|
{
|
||
|
|
readtrace (tracefile);
|
||
|
|
lo = hi = 0;
|
||
|
|
usergen = 1;
|
||
|
|
}
|
||
|
|
while (argc > 0)
|
||
|
|
{
|
||
|
|
int found = 0;
|
||
|
|
all = strcmp (argv[0], "all") == 0;
|
||
|
|
for (const struct fun *f = funtab; f->name; f++)
|
||
|
|
if (all || strcmp (argv[0], f->name) == 0)
|
||
|
|
{
|
||
|
|
found = 1;
|
||
|
|
if (!usergen)
|
||
|
|
{
|
||
|
|
lo = f->lo;
|
||
|
|
hi = f->hi;
|
||
|
|
}
|
||
|
|
bench (f, lo, hi, type, gen);
|
||
|
|
if (usergen && !all)
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
if (!found)
|
||
|
|
printf ("unknown function: %s\n", argv[0]);
|
||
|
|
argv++;
|
||
|
|
argc--;
|
||
|
|
}
|
||
|
|
return 0;
|
||
|
|
}
|