Skip to content

Commit

Permalink
avoid xbyak at default
Browse files Browse the repository at this point in the history
  • Loading branch information
herumi committed Jun 15, 2017
1 parent 3eca85e commit dc9e71c
Show file tree
Hide file tree
Showing 4 changed files with 99 additions and 93 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ ifeq ($(AVX2),flags)
HAS_AVX2=-mavx2
endif
# ----------------------------------------------------------------
INC_DIR= -I../src -I../xbyak
INC_DIR= -I../src -I../xbyak -I./include
CFLAGS += $(INC_DIR) -O3 $(HAS_AVX2) $(ADD_OPT) -mfpmath=sse -DNDEBUG
CFLAGS_WARN=-Wall -Wextra -Wformat=2 -Wcast-qual -Wcast-align -Wwrite-strings -Wfloat-equal -Wpointer-arith
CFLAGS+=$(CFLAGS_WARN)
Expand Down
14 changes: 2 additions & 12 deletions bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,7 @@
#include "fmath.hpp"
#include <cmath>
#include <algorithm>

#define XBYAK_NO_OP_NAMES
#include "xbyak/xbyak_util.h"
#include <cybozu/benchmark.hpp>

inline void put(const void *p)
{
Expand All @@ -25,8 +23,6 @@ inline void puti(const void *p)
printf("{%x, %x, %x, %x}\n", i[0], i[1], i[2], i[3]);
}

static bool s_hasSSE41 = false;

float dummy(float x)
{
return x;
Expand Down Expand Up @@ -184,7 +180,7 @@ void validateExp(float (*f)(float), const char *msg, float e, bool verifyAll)

void benchmark(float (*f)(float), const char *msg, float b, float e, float d, int N, double *adj, double *pbase, int *pcount)
{
Xbyak::util::Clock clk;
cybozu::CpuClock clk;
float sum = 0;
int count = 0;
clk.begin();
Expand Down Expand Up @@ -444,12 +440,6 @@ int main(int argc, char *argv[])
{
PutVersion();
bool verifyAll = false;
Xbyak::util::Cpu cpu;
s_hasSSE41 = cpu.has(Xbyak::util::Cpu::tSSE41);

if (s_hasSSE41) {
puts("SSE41 enable");
}
argc--, argv++;
while (argc > 0) {
if (strcmp(*argv, "-all") == 0) {
Expand Down
7 changes: 3 additions & 4 deletions fastexp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,7 @@ Simply create a console project, and add this file to the project.
#include <emmintrin.h>

#include "fmath.hpp"
#define XBYAK_NO_OP_NAMES
#include "xbyak/xbyak_util.h"
#include <cybozu/benchmark.hpp>
/*
Useful macro definitions for memory alignment:
*/
Expand Down Expand Up @@ -978,7 +977,7 @@ void measure(performance_t *perf, double *values, size_t n)
}

for (p = perf;p->func != NULL;++p) {
Xbyak::util::Clock clk;
cybozu::CpuClock clk;
clk.begin();
p->func(p->values, n);
clk.end();
Expand Down Expand Up @@ -1012,7 +1011,7 @@ void fmath_expd(double *values, size_t n)
void benchmark(const char *str, double f(double))
{
double a = 0;
Xbyak::util::Clock clk;
cybozu::CpuClock clk;
clk.begin();
int n = 0;
for (double x = 0; x < 1; x += 1e-8) {
Expand Down
169 changes: 93 additions & 76 deletions include/cybozu/benchmark.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,30 +11,63 @@
#endif
#include <stdio.h>

#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__x86_64__)
#define CYBOZU_BENCH_USE_RDTSC
#ifndef CYBOZU_BENCH_DONT_USE_RDTSC
#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__x86_64__)
#define CYBOZU_BENCH_USE_RDTSC
#define CYBOZU_BENCH_USE_CPU_TIMER
#endif
#if defined(__GNUC__) && defined(__ARM_ARCH_7A__)
// #define CYBOZU_BENCH_USE_MRC
// #define CYBOZU_BENCH_USE_CPU_TIMER
#endif
#endif
#ifdef CYBOZU_BENCH_USE_RDTSC


#include <assert.h>
#include <time.h>
#ifdef _MSC_VER
#include <intrin.h>
#endif
#include <sys/timeb.h>
#else
#include <cybozu/time.hpp>
#endif

#ifndef CYBOZU_UNUSED
#ifdef __GNUC__
#define CYBOZU_UNUSED __attribute__((unused))
#else
#define CYBOZU_UNUSED
#endif
#endif

namespace cybozu {

#ifdef CYBOZU_BENCH_USE_RDTSC
class CpuClock {
public:
static inline uint64_t getRdtsc()
static inline uint64_t getCpuClk()
{
#ifdef CYBOZU_BENCH_USE_RDTSC
#ifdef _MSC_VER
return __rdtsc();
#else
unsigned int eax, edx;
__asm__ volatile("rdtsc" : "=a"(eax), "=d"(edx));
return ((uint64_t)edx << 32) | eax;
#endif
#elif defined(CYBOZU_BENCH_USE_MRC)
uint32_t clk;
__asm__ volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(clk));
return clk;
#else
#ifdef _MSC_VER
struct _timeb timeb;
_ftime_s(&timeb);
return uint64_t(timeb.time) * 1000000000 + timeb.millitm * 1000000;
#else
struct timespec tp;
int ret CYBOZU_UNUSED = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &tp);
assert(ret == 0);
return uint64_t(tp.tv_sec) * 1000000000 + tp.tv_nsec;
#endif
#endif
}
CpuClock()
Expand All @@ -44,11 +77,11 @@ class CpuClock {
}
void begin()
{
clock_ -= getRdtsc();
clock_ -= getCpuClk();
}
void end()
{
clock_ += getRdtsc();
clock_ += getCpuClk();
count_++;
}
int getCount() const { return count_; }
Expand All @@ -58,77 +91,43 @@ class CpuClock {
{
double t = getClock() / double(getCount()) / N;
if (msg && *msg) printf("%s ", msg);
#ifdef CYBOZU_BENCH_USE_CPU_TIMER
if (t > 1e6) {
printf("%7.3fMclk", t * 1e-6);
} else if (t > 1e3) {
printf("%7.3fKclk", t * 1e-3);
} else {
printf("%6.2f clk", t);
}
#else
if (t > 1e6) {
printf("%7.3fmsec", t * 1e-6);
} else if (t > 1e3) {
printf("%7.3fusec", t * 1e-3);
} else {
printf("%6.2fnsec", t);
}
#endif
if (msg && *msg) printf("\n");
}
// adhoc constatns for CYBOZU_BENCH
#ifdef CYBOZU_BENCH_USE_CPU_TIMER
static const int loopN1 = 1000;
static const int loopN2 = 1000000;
static const uint64_t maxClk = (uint64_t)3e8;
private:
uint64_t clock_;
int count_;
};
static const int loopN2 = 100;
static const uint64_t maxClk = (uint64_t)1e8;
#else
class CpuClock {
cybozu::Time t_;
static const int loopN1 = 100;
static const int loopN2 = 100;
static const uint64_t maxClk = (uint64_t)1e8;
#endif
private:
uint64_t clock_;
int count_;
public:
CpuClock() : clock_(0), count_(0) { t_.setTime(0, 0); }
void begin()
{
if (count_ == 0) t_.setCurrentTime(); // start
}
/*
@note QQQ ; this is not same api as rdtsc version
*/
void end()
{
cybozu::Time cur(true);
int diffSec = (int)(cur.getTime() - t_.getTime());
int diffMsec = cur.getMsec() - t_.getMsec();
const int diff = diffSec * 1000 + diffMsec;
clock_ = diff;
count_++;
}
int getCount() const { return count_; }
uint64_t getClock() const { return clock_; }
void clear() { t_.setTime(0, 0); clock_ = 0; count_ = 0; }
void put(const char *msg = 0, int N = 1) const
{
double t = getClock() / double(getCount()) / N;
if (msg && *msg) printf("%s ", msg);
if (t > 1) {
printf("%6.2fmsec", t);
} else if (t > 1e-3) {
printf("%6.2fusec", t * 1e3);
} else {
printf("%6.2fnsec", t * 1e6);
}
if (msg && *msg) printf("\n");
}
// adhoc constatns for CYBOZU_BENCH
static const int loopN1 = 1000000;
static const int loopN2 = 1000;
static const uint64_t maxClk = (uint64_t)500;
};
#endif

namespace bench {

static CpuClock g_clk;
#ifdef __GNUC__
#define CYBOZU_UNUSED __attribute__((unused))
#else
#define CYBOZU_UNUSED
#endif
static int CYBOZU_UNUSED g_loopNum;

} // cybozu::bench
Expand All @@ -139,16 +138,34 @@ static int CYBOZU_UNUSED g_loopNum;
*/
#define CYBOZU_BENCH(msg, func, ...) \
{ \
const uint64_t maxClk = cybozu::CpuClock::maxClk; \
cybozu::CpuClock clk; \
for (int i = 0; i < cybozu::CpuClock::loopN2; i++) { \
clk.begin(); \
for (int j = 0; j < cybozu::CpuClock::loopN1; j++) { func(__VA_ARGS__); } \
clk.end(); \
if (clk.getClock() > maxClk) break; \
const uint64_t _cybozu_maxClk = cybozu::CpuClock::maxClk; \
cybozu::CpuClock _cybozu_clk; \
for (int _cybozu_i = 0; _cybozu_i < cybozu::CpuClock::loopN2; _cybozu_i++) { \
_cybozu_clk.begin(); \
for (int _cybozu_j = 0; _cybozu_j < cybozu::CpuClock::loopN1; _cybozu_j++) { func(__VA_ARGS__); } \
_cybozu_clk.end(); \
if (_cybozu_clk.getClock() > _cybozu_maxClk) break; \
} \
if (msg && *msg) _cybozu_clk.put(msg, cybozu::CpuClock::loopN1); \
cybozu::bench::g_clk = _cybozu_clk; cybozu::bench::g_loopNum = cybozu::CpuClock::loopN1; \
}

/*
double clk;
CYBOZU_BENCH_T(clk, <func>, <param1>, <param2>, ...);
clk is set by CYBOZU_BENCH_T
*/
#define CYBOZU_BENCH_T(clk, func, ...) \
{ \
const uint64_t _cybozu_maxClk = cybozu::CpuClock::maxClk; \
cybozu::CpuClock _cybozu_clk; \
for (int _cybozu_i = 0; _cybozu_i < cybozu::CpuClock::loopN2; _cybozu_i++) { \
_cybozu_clk.begin(); \
for (int _cybozu_j = 0; _cybozu_j < cybozu::CpuClock::loopN1; _cybozu_j++) { func(__VA_ARGS__); } \
_cybozu_clk.end(); \
if (_cybozu_clk.getClock() > _cybozu_maxClk) break; \
} \
if (msg && *msg) clk.put(msg, cybozu::CpuClock::loopN1); \
cybozu::bench::g_clk = clk; cybozu::bench::g_loopNum = cybozu::CpuClock::loopN1; \
clk = _cybozu_clk.getClock() / (double)_cybozu_clk.getCount() / cybozu::CpuClock::loopN1; \
}

/*
Expand All @@ -158,12 +175,12 @@ static int CYBOZU_UNUSED g_loopNum;
*/
#define CYBOZU_BENCH_C(msg, _N, func, ...) \
{ \
cybozu::CpuClock clk; \
clk.begin(); \
for (int j = 0; j < _N; j++) { func(__VA_ARGS__); } \
clk.end(); \
if (msg && *msg) clk.put(msg, _N); \
cybozu::bench::g_clk = clk; cybozu::bench::g_loopNum = _N; \
cybozu::CpuClock _cybozu_clk; \
_cybozu_clk.begin(); \
for (int _cybozu_j = 0; _cybozu_j < _N; _cybozu_j++) { func(__VA_ARGS__); } \
_cybozu_clk.end(); \
if (msg && *msg) _cybozu_clk.put(msg, _N); \
cybozu::bench::g_clk = _cybozu_clk; cybozu::bench::g_loopNum = _N; \
}

} // cybozu

0 comments on commit dc9e71c

Please sign in to comment.