-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmath_func.py
145 lines (123 loc) · 4.88 KB
/
math_func.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# -*- coding: utf-8 -*-
"""
Created on Thu Nov 17 15:35:42 2016
@author: shiwu_001
"""
from config import DTYPE
from numpy import int32
from pycuda.elementwise import ElementwiseKernel
from pycuda.reduction import ReductionKernel
from pycuda.tools import context_dependent_memoize
"""
Elementwise Kernel
"""
# r = a
seta = ElementwiseKernel("float a, float *r",
"r[i] = a",
"kernel_seta")
# r = x
setx = ElementwiseKernel("float *x, float *r",
"r[i] = x[i]",
"kernel_setx")
# r = ax + b
axpb = ElementwiseKernel("float a, float *x, float b, float *r",
"r[i] = a*x[i] + b",
"kernel_axpb")
# r = sqr(x)
sqrx = ElementwiseKernel("float *x, float *r",
"r[i] = x[i]*x[i]",
"kernel_sqrx")
# r = sqrt(x)
sqrtx = ElementwiseKernel("float *x, float *r",
"r[i] = sqrt(x[i])",
"kernel_sqrtx")
# r = x / (y + eps)
xdivyeps = ElementwiseKernel("float *x, float *y, float eps, float *r",
"r[i] = x[i] / (y[i] + eps)",
"kernel_xdivyeps")
# r = ax + by
axpby = ElementwiseKernel("float a, float *x, float b, float *y, float *r",
"r[i] = a*x[i] + b*y[i]",
"kernel_axpby")
# r = ax + by + cz
axpbypcz = ElementwiseKernel(
"float a, float *x, float b, float *y, float c, float *z, float *r",
"r[i] = a*x[i] + b*y[i] + c*z[i]",
"kernel_axpbypcz")
# r = pos(x) != pos(y) w/ pos(x[i]) = x[i] > 0
xorpos = ElementwiseKernel("float *x, float *y, float *r",
"r[i] = (x[i]>0) != (y[i]>0)",
"kernel_xorpos")
# r[i] = 1. if x[i] > a;
# 0. if x[i] < -a;
# [0,1] otherwise
softlinear = ElementwiseKernel("float a, float *x, float *r",
"r[i] = (x[i] > a) ? float(1.) : ((a == float(0.) || x[i] <= -a) ? float(0.) : (0.5 * x[i] / a + 0.5))",
"kernel_softlinear", )
# r[i] = sigmoid(x[i] / (a + eps))
softsigmoid = ElementwiseKernel("float a, float *x, float *r",
"r[i] = 1. / (1. + exp(-x[i] / (a + 1e-8)))",
"kernel_softsigmoid",
preamble="#include <cmath>")
# r[i] = x[i] * y[i]
eltmul = ElementwiseKernel("float *x, float *y, float *r",
"r[i] = x[i]*y[i]",
"kernel_eltmul")
# r[i] = clip(x[i], a, b), x[i] \in [a, b]
clipab = ElementwiseKernel("float *x, float a, float b, float *r",
"r[i] = (x[i] > a) ? a : ((x[i] < b) ? b : x[i])",
"kernel_clipab")
"""
Reduction Kernel
"""
# (r) = sum(abs(x))
@context_dependent_memoize
def get_sumabs_kernel():
return ReductionKernel(DTYPE, neutral="0.",
reduce_expr="a+b",
map_expr="abs(x[i])",
arguments="float *x",
name="kernel_sumabs")
# (r) = sum(sqr(x))
@context_dependent_memoize
def get_sumsqr_kernel():
return ReductionKernel(DTYPE, neutral="0.",
reduce_expr="a+b",
map_expr="x[i]*x[i]",
arguments="float *x",
name="kernel_sumsqr")
# (r) = sum(pos(x) != pos(y)) w/ pos(x[i]) = x[i] > 0
@context_dependent_memoize
def get_sumxorpos_kernel():
return ReductionKernel(int32, neutral="0",
reduce_expr="a+b",
arguments="float *x, float *y",
map_expr="(x[i] > 0) != (y[i] > 0)",
name="kernel_sumxorpos")
def sumabs(x, stream=None, allocator=None):
krnl = get_sumabs_kernel()
return krnl(x, stream=stream, allocator=allocator)
def sumsqr(x, stream=None, allocator=None):
krnl = get_sumsqr_kernel()
return krnl(x, stream=stream, allocator=allocator)
def sumxorpos(x, y, stream=None, allocator=None):
krnl = get_sumxorpos_kernel()
return krnl(x, y, stream=stream, allocator=allocator)
#if __name__ == "__main__":
# import pycuda.autoinit
# import pycuda.gpuarray as garr
# import numpy as np
# a = np.arange(5, dtype=DTYPE)
# print a
# a_gpu = garr.to_gpu(a)
# b_gpu = a_gpu.copy()
# print "a =", a_gpu.get()
# print "b =", b_gpu.get()
# print "sumxorpos", sumxorpos(a_gpu, b_gpu)
# a_gpu -= 2
# print "a =", a_gpu.get()
# print "b =", b_gpu.get()
# print "sumxorpos", sumxorpos(a_gpu, b_gpu)
# soft_a = np.float32(0)
# softlinear(soft_a, a_gpu, b_gpu)
# print "softlinear(%f, a) =" % soft_a, b_gpu