Skip to content

Commit

Permalink
Support newer ISPCs.
Browse files Browse the repository at this point in the history
I also had to remove some stuff that caused ISPC to crash, and a
couple of tests also fail now. This backend is a bit rickety as it
does not see much maintenance.
  • Loading branch information
athas committed Feb 4, 2025
1 parent 7ad26ab commit 1bc62e7
Show file tree
Hide file tree
Showing 9 changed files with 101 additions and 101 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.

* A bug in the "sink" optimisation pass could cause compiler crashes.

* Compile errors with newer versions of `ispc`.

## [0.25.26]

### Fixed
Expand Down
22 changes: 11 additions & 11 deletions rts/c/ispc_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,17 @@ make_extract(uint64)
make_extract(float16)
make_extract(float)
make_extract(double)
make_extract(int8* uniform)
make_extract(int16* uniform)
make_extract(int32* uniform)
make_extract(int64* uniform)
make_extract(uint8* uniform)
make_extract(uint16* uniform)
make_extract(uint32* uniform)
make_extract(uint64* uniform)
make_extract(float16* uniform)
make_extract(float* uniform)
make_extract(double* uniform)
/* make_extract(int8* uniform) */
/* make_extract(int16* uniform) */
/* make_extract(int32* uniform) */
/* make_extract(int64* uniform) */
/* make_extract(uint8* uniform) */
/* make_extract(uint16* uniform) */
/* make_extract(uint32* uniform) */
/* make_extract(uint64* uniform) */
/* make_extract(float16* uniform) */
/* make_extract(float* uniform) */
/* make_extract(double* uniform) */
make_extract(struct futhark_context)
make_extract(struct memblock)

Expand Down
26 changes: 13 additions & 13 deletions rts/c/scalar.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ SCALAR_FUN_ATTR uint64_t mul64(uint64_t x, uint64_t y) {
return x * y;
}

#if ISPC
#if defined(ISPC)

SCALAR_FUN_ATTR uint8_t udiv8(uint8_t x, uint8_t y) {
// This strange pattern is used to prevent the ISPC compiler from
Expand Down Expand Up @@ -1309,7 +1309,7 @@ SCALAR_FUN_ATTR uint8_t futrts_smul_hi8 ( int8_t a, int8_t b) { return ((int16_
SCALAR_FUN_ATTR uint16_t futrts_smul_hi16(int16_t a, int16_t b) { return ((int32_t)a) * ((int32_t)b) >> 16; }
SCALAR_FUN_ATTR uint32_t futrts_smul_hi32(int32_t a, int32_t b) { return __mulhi(a, b); }
SCALAR_FUN_ATTR uint64_t futrts_smul_hi64(int64_t a, int64_t b) { return __mul64hi(a, b); }
#elif ISPC
#elif defined(ISPC)
SCALAR_FUN_ATTR uint8_t futrts_umul_hi8(uint8_t a, uint8_t b) { return ((uint16_t)a) * ((uint16_t)b) >> 8; }
SCALAR_FUN_ATTR uint16_t futrts_umul_hi16(uint16_t a, uint16_t b) { return ((uint32_t)a) * ((uint32_t)b) >> 16; }
SCALAR_FUN_ATTR uint32_t futrts_umul_hi32(uint32_t a, uint32_t b) { return ((uint64_t)a) * ((uint64_t)b) >> 32; }
Expand Down Expand Up @@ -1430,7 +1430,7 @@ SCALAR_FUN_ATTR int32_t futrts_clzz64(int64_t x) {
return __clzll(x);
}

#elif ISPC
#elif defined(ISPC)

SCALAR_FUN_ATTR int32_t futrts_clzz8(int8_t x) {
return count_leading_zeros((int32_t)(uint8_t)x)-24;
Expand Down Expand Up @@ -1518,7 +1518,7 @@ SCALAR_FUN_ATTR int32_t futrts_ctzz64(int64_t x) {
return y == 0 ? 64 : y - 1;
}

#elif ISPC
#elif defined(ISPC)

SCALAR_FUN_ATTR int32_t futrts_ctzz8(int8_t x) {
return x == 0 ? 8 : count_trailing_zeros((int32_t)x);
Expand Down Expand Up @@ -1628,7 +1628,7 @@ SCALAR_FUN_ATTR float fpow32(float x, float y) {
return pow(x, y);
}

#elif ISPC
#elif defined(ISPC)

SCALAR_FUN_ATTR float fabs32(float x) {
return abs(x);
Expand All @@ -1645,7 +1645,7 @@ SCALAR_FUN_ATTR float fmin32(float x, float y) {
SCALAR_FUN_ATTR float fpow32(float a, float b) {
float ret;
foreach_active (i) {
uniform float r = __stdlib_powf(extract(a, i), extract(b, i));
uniform float r = pow(extract(a, i), extract(b, i));
ret = insert(ret, i, r);
}
return ret;
Expand Down Expand Up @@ -1674,7 +1674,7 @@ SCALAR_FUN_ATTR bool futrts_isnan32(float x) {
return isnan(x);
}

#if ISPC
#if defined(ISPC)

SCALAR_FUN_ATTR bool futrts_isinf32(float x) {
return !isnan(x) && isnan(x - x);
Expand Down Expand Up @@ -1905,7 +1905,7 @@ SCALAR_FUN_ATTR float futrts_fma32(float a, float b, float c) {
return fma(a, b, c);
}

#elif ISPC
#elif defined(ISPC)

SCALAR_FUN_ATTR float futrts_log32(float x) {
return futrts_isfinite32(x) || (futrts_isinf32(x) && x < 0)? log(x) : x;
Expand Down Expand Up @@ -2107,7 +2107,7 @@ SCALAR_FUN_ATTR float futrts_lerp32(float v0, float v1, float t) {
}

SCALAR_FUN_ATTR float futrts_ldexp32(float x, int32_t y) {
return x * pow((double)2.0, (double)y);
return x * pow((uniform float)2.0, (float)y);
}

SCALAR_FUN_ATTR float futrts_copysign32(float x, float y) {
Expand Down Expand Up @@ -2267,7 +2267,7 @@ SCALAR_FUN_ATTR float futrts_fma32(float a, float b, float c) {
}
#endif

#if ISPC
#if defined(ISPC)
SCALAR_FUN_ATTR int32_t futrts_to_bits32(float x) {
return intbits(x);
}
Expand Down Expand Up @@ -2306,7 +2306,7 @@ SCALAR_FUN_ATTR float fsignum32(float x) {
SCALAR_FUN_ATTR double futrts_from_bits64(int64_t x);
SCALAR_FUN_ATTR int64_t futrts_to_bits64(double x);

#if ISPC
#if defined(ISPC)
SCALAR_FUN_ATTR bool futrts_isinf64(float x) {
return !isnan(x) && isnan(x - x);
}
Expand Down Expand Up @@ -2386,7 +2386,7 @@ SCALAR_FUN_ATTR double fmin64(double x, double y) {
SCALAR_FUN_ATTR double fpow64(double a, double b) {
float ret;
foreach_active (i) {
uniform float r = __stdlib_powf(extract(a, i), extract(b, i));
uniform float r = pow(extract(a, i), extract(b, i));
ret = insert(ret, i, r);
}
return ret;
Expand Down Expand Up @@ -2673,7 +2673,7 @@ SCALAR_FUN_ATTR double futrts_lerp64(double v0, double v1, double t) {
}

SCALAR_FUN_ATTR double futrts_ldexp64(double x, int32_t y) {
return x * pow((double)2.0, (double)y);
return x * pow((uniform double)2.0, (double)y);
}

SCALAR_FUN_ATTR double futrts_copysign64(double x, double y) {
Expand Down
12 changes: 6 additions & 6 deletions rts/c/scalar_f16.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
// compiler will have to be real careful!
typedef float f16;

#elif ISPC
#elif defined(ISPC)
typedef float16 f16;

#else
Expand Down Expand Up @@ -154,7 +154,7 @@ SCALAR_FUN_ATTR f16 fpow16(f16 x, f16 y) {
return pow(x, y);
}

#elif ISPC
#elif defined(ISPC)
SCALAR_FUN_ATTR f16 fabs16(f16 x) {
return abs(x);
}
Expand Down Expand Up @@ -190,7 +190,7 @@ SCALAR_FUN_ATTR f16 fpow16(f16 x, f16 y) {
}
#endif

#if ISPC
#if defined(ISPC)
SCALAR_FUN_ATTR bool futrts_isinf16(float x) {
return !futrts_isnan16(x) && futrts_isnan16(x - x);
}
Expand Down Expand Up @@ -345,7 +345,7 @@ SCALAR_FUN_ATTR f16 futrts_mad16(f16 a, f16 b, f16 c) {
SCALAR_FUN_ATTR f16 futrts_fma16(f16 a, f16 b, f16 c) {
return fma(a, b, c);
}
#elif ISPC
#elif defined(ISPC)

SCALAR_FUN_ATTR f16 futrts_log16(f16 x) {
return futrts_isfinite16(x) || (futrts_isinf16(x) && x < 0) ? log(x) : x;
Expand Down Expand Up @@ -664,7 +664,7 @@ SCALAR_FUN_ATTR int16_t futrts_to_bits16(f16 x) {
SCALAR_FUN_ATTR f16 futrts_from_bits16(int16_t x) {
return __ushort_as_half(x);
}
#elif ISPC
#elif defined(ISPC)

SCALAR_FUN_ATTR int16_t futrts_to_bits16(f16 x) {
varying int16_t y = *((varying int16_t * uniform)&x);
Expand Down Expand Up @@ -916,7 +916,7 @@ SCALAR_FUN_ATTR double fpconv_f16_f64(f16 x) {
return (double) x;
}

#if ISPC
#if defined(ISPC)
SCALAR_FUN_ATTR f16 fpconv_f64_f16(double x) {
return (f16) ((float)x);
}
Expand Down
9 changes: 4 additions & 5 deletions rts/c/uniform.h
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@

// Start of uniform.h

// Uniform versions of all library functions as to
// improve performance in ISPC when in an uniform context.

#if ISPC
#if defined(ISPC)

static inline uniform uint8_t add8(uniform uint8_t x, uniform uint8_t y) {
return x + y;
Expand Down Expand Up @@ -839,7 +838,7 @@ static inline uniform float fmin32(uniform float x, uniform float y) {
}

static inline uniform float fpow32(uniform float x, uniform float y) {
return __stdlib_powf(x, y);
return pow(x, y);
}

static inline uniform bool futrts_isnan32(uniform float x) {
Expand Down Expand Up @@ -1181,7 +1180,7 @@ static inline uniform double fmin64(uniform double x, uniform double y) {
}

static inline uniform double fpow64(uniform double x, uniform double y) {
return __stdlib_powf(x, y);
return pow(x, y);
}

static inline uniform double futrts_log64(uniform double x) {
Expand Down Expand Up @@ -1445,7 +1444,7 @@ static inline uniform double fpconv_f16_f64(uniform f16 x) {
}

static inline uniform f16 fpconv_f64_f16(uniform double x) {
return (uniform f16) ((uniform float)x);
return (uniform f16) ((uniform float)x);
}

#endif
Expand Down
2 changes: 1 addition & 1 deletion src/Futhark/CodeGen/Backends/MulticoreC.hs
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ compileSetRetvalStructValues struct vnames we = concat $ zipWith field vnames we
where
field name (ct, Prim _) =
[C.cstms|$id:struct.$id:(closureRetvalStructField name)=(($ty:ct*)&$id:name);
$escstm:("#if ISPC")
$escstm:("#if defined(ISPC)")
$id:struct.$id:(closureRetvalStructField name)+= programIndex;
$escstm:("#endif")|]
field name (_, MemBlock) =
Expand Down
2 changes: 1 addition & 1 deletion src/Futhark/CodeGen/Backends/MulticoreISPC.hs
Original file line number Diff line number Diff line change
Expand Up @@ -803,7 +803,7 @@ compileOp (SegOp name params seq_task par_task retvals (SchedulerInfo e sched))
aos_name <- newVName "aos"
GC.items
[C.citems|
$escstm:("#if ISPC")
$escstm:("#if defined(ISPC)")
$tyqual:uniform struct $id:fstruct $id:aos_name[programCount];
$id:aos_name[programIndex] = $id:(fstruct <> "_");
$escstm:("foreach_active (i)")
Expand Down
103 changes: 50 additions & 53 deletions tests/ad/issue1473.fut
Original file line number Diff line number Diff line change
@@ -1,72 +1,69 @@
-- test mpr sim with ad for params
-- ==

def pi = 3.141592653589793f32

-- some type abbreviations
type mpr_pars = {G: f32, I: f32, Delta: f32, eta: f32, tau: f32, J: f32}
type mpr_node = (f32, f32)
type mpr_net [n] = [n] mpr_node
type mpr_net [n] = [n]mpr_node

-- this is tranposed from mpr-pdq to avoid tranposes in history update
type mpr_hist [t] [n] = [t] mpr_net [n]
type mpr_hist [t] [n] = [t]mpr_net [n]
type connectome [n] = {weights: [n][n]f32, idelays: [n][n]i64}

-- do one time step w/ Euler
def mpr_step [t] [n] (now: i64) (dt: f32) (buf: *mpr_hist[t][n]) (conn: connectome[n]) (p: mpr_pars): *mpr_hist[t][n] =

-- define individual derivatives as in mpr pdq
let dr r V = 1/p.tau * ( p.Delta / (pi * p.tau) + 2 * V * r)
let dV r V r_c = 1/p.tau * ( V**2 - pi**2 * p.tau**2 * r**2 + p.eta + p.J * p.tau * r + p.I + r_c)
let dfun (r, V, c) = (dr r V, dV r V c)

-- unpack current state for clarity
let (r, V) = last buf |> unzip

-- connectivity eval
let r_c_i i w d = map2 (\wj dj -> wj * buf[now - dj, i].0) w d |> reduce (+) 0f32 |> (*p.G)
let r_c = map3 r_c_i (iota n) conn.weights conn.idelays

-- Euler step
let erV = map3 (\r V c -> (dr r V, dV r V c)) r V r_c
|> map2 (\(r, V) (dr, dV) -> (r + dt * dr, V + dt * dV)) (last buf)
|> map1 (\(r, V) -> (if r >= 0f32 then r else 0f32, V))

-- now for the Heun step
let (er, eV) = unzip erV
let hrV = map3 (\r V c -> (dr r V, dV r V c)) er eV r_c
|> map2 (\(r, V) (dr, dV) -> (r + dt * dr, V + dt * dV)) (last buf)
|> map1 (\(r, V) -> (if r >= 0f32 then r else 0f32, V))

-- return updated buffer
in buf with [now + 1] = copy hrV
def mpr_step [t] [n] (now: i64) (dt: f32) (buf: *mpr_hist [t] [n]) (conn: connectome [n]) (p: mpr_pars) : *mpr_hist [t] [n] =
-- define individual derivatives as in mpr pdq
let dr r V = 1 / p.tau * (p.Delta / (pi * p.tau) + 2 * V * r)
let dV r V r_c = 1 / p.tau * (V ** 2 - pi ** 2 * p.tau ** 2 * r ** 2 + p.eta + p.J * p.tau * r + p.I + r_c)
let dfun (r, V, c) = (dr r V, dV r V c)
-- unpack current state for clarity
let (r, V) = last buf |> unzip
-- connectivity eval
let r_c_i i w d = map2 (\wj dj -> wj * buf[now - dj, i].0) w d |> reduce (+) 0f32 |> (* p.G)
let r_c = map3 r_c_i (iota n) conn.weights conn.idelays
-- Euler step
let erV =
map3 (\r V c -> (dr r V, dV r V c)) r V r_c
|> map2 (\(r, V) (dr, dV) -> (r + dt * dr, V + dt * dV)) (last buf)
|> map1 (\(r, V) -> (if r >= 0f32 then r else 0f32, V))
-- now for the Heun step
let (er, eV) = unzip erV
let hrV =
map3 (\r V c -> (dr r V, dV r V c)) er eV r_c
|> map2 (\(r, V) (dr, dV) -> (r + dt * dr, V + dt * dV)) (last buf)
|> map1 (\(r, V) -> (if r >= 0f32 then r else 0f32, V))
-- return updated buffer
in buf with [now + 1] = copy hrV

def run_mpr [t] [n] (horizon: i64) (dt: f32) (buf: mpr_hist[t][n]) (conn: connectome[n]) (p: mpr_pars): mpr_hist[t][n] =
loop buf = copy buf
for now < (t - horizon - 1) do mpr_step (now + horizon) dt buf conn p
def run_mpr [t] [n] (horizon: i64) (dt: f32) (buf: mpr_hist [t] [n]) (conn: connectome [n]) (p: mpr_pars) : mpr_hist [t] [n] =
loop buf = copy buf
for now < (t - horizon - 1) do
mpr_step (now + horizon) dt buf conn p

def mpr_pars_with_G (p: mpr_pars) (new_G: f32): mpr_pars =
let new_p = copy p
in new_p with G = new_G
def mpr_pars_with_G (p: mpr_pars) (new_G: f32) : mpr_pars =
let new_p = copy p
in new_p with G = new_G

def loss [t] [n] (x:mpr_hist[t][n]): f32 =
let r = map unzip x[t-10:] |> unzip |> (.0)
let sum = map (reduce (+) 0f32) r |> reduce (+) 0f32
in
sum
def loss [t] [n] (x: mpr_hist [t] [n]) : f32 =
let r = map unzip x[t - 10:] |> unzip |> (.0)
let sum = map (reduce (+) 0f32) r |> reduce (+) 0f32
in sum

def sweep [t] [n] (ng: i64) (horizon: i64) (dt: f32) (buf: mpr_hist[t][n]) (conn: connectome[n]) (p: mpr_pars): [ng]f32 =
let Gs = tabulate ng (\i -> 0.0 + (f32.i64 i) * 0.1)
let do_one G = run_mpr horizon dt buf conn (mpr_pars_with_G p G) |> loss
in map (\g -> vjp do_one g 1f32) Gs
def sweep [t] [n] (ng: i64) (horizon: i64) (dt: f32) (buf: mpr_hist [t] [n]) (conn: connectome [n]) (p: mpr_pars) : [ng]f32 =
let Gs = tabulate ng (\i -> 0.0 + (f32.i64 i) * 0.1)
let do_one G = run_mpr horizon dt buf conn (mpr_pars_with_G p G) |> loss
in map (\g -> vjp do_one g 1f32) Gs

-- ==
-- compiled input { 1i64 5i64 10i64 7i64 }
-- no_ispc compiled input { 1i64 5i64 10i64 7i64 }
-- output { [0.000086f32] }
def main (ng: i64) (nh: i64) (nt: i64) (nn: i64) =
let dt = 0.01f32
let buf = tabulate_2d (nt + nh) nn (\i j -> (0.1f32, -2.0f32))
let conn = {weights=tabulate_2d nn nn (\i j -> 0.1f32),
idelays=tabulate_2d nn nn (\i j -> ((i * j) % nh))
}
let p = {G=0.1f32, I=0.0f32, Delta=0.7f32, eta=(-4.6f32), tau=1.0f32, J=14.5f32}
in sweep ng nh dt buf conn p
let dt = 0.01f32
let buf = tabulate_2d (nt + nh) nn (\i j -> (0.1f32, -2.0f32))
let conn =
{ weights = tabulate_2d nn nn (\i j -> 0.1f32)
, idelays = tabulate_2d nn nn (\i j -> ((i * j) % nh))
}
let p = {G = 0.1f32, I = 0.0f32, Delta = 0.7f32, eta = (-4.6f32), tau = 1.0f32, J = 14.5f32}
in sweep ng nh dt buf conn p
Loading

0 comments on commit 1bc62e7

Please sign in to comment.