Skip to content

Commit 36107bf

Browse files
Add conv2d/3d depthwise backup kernels (#1464)
Op list: - [x] conv_depthwise2d_forward - [x] conv_depthwise2d_backward - [x] conv_depthwise3d_forward - [x] conv_depthwise3d_backward Tip:No registration. --------- Co-authored-by: Yutao Xu <[email protected]>
1 parent 299b9e8 commit 36107bf

6 files changed

+2535
-0
lines changed
+150
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
2+
#include <ATen/AccumulateType.h>
3+
#include <ATen/Dispatch.h>
4+
#include <ATen/core/Tensor.h>
5+
#include <ATen/div_rtn.h>
6+
#include <ATen/native/ConvUtils.h>
7+
#include <ATen/native/IndexingUtils.h>
8+
#include <ATen/native/Resize.h>
9+
#include <ATen/native/xpu/sycl/DepthwiseConv2dKernels.h>
10+
11+
#ifndef AT_PER_OPERATOR_HEADERS
12+
#include <ATen/Functions.h>
13+
#include <ATen/NativeFunctions.h>
14+
#else
15+
#include <ATen/ops/_conv_depthwise2d_native.h>
16+
#include <ATen/ops/empty.h>
17+
#endif
18+
19+
namespace at::native {
20+
Tensor& conv_depthwise2d_xpu_out(
21+
const Tensor& input_,
22+
const Tensor& weight_,
23+
IntArrayRef kernel_size,
24+
const std::optional<Tensor>& bias_opt,
25+
IntArrayRef stride,
26+
IntArrayRef padding,
27+
IntArrayRef dilation,
28+
Tensor& out) {
29+
TORCH_CHECK(kernel_size.size() == 2);
30+
TORCH_CHECK(stride.size() == 2);
31+
TORCH_CHECK(padding.size() == 2);
32+
TORCH_CHECK(dilation.size() == 2);
33+
34+
auto input = input_.expect_contiguous();
35+
auto weight = weight_.expect_contiguous();
36+
auto bias = [&] {
37+
if (bias_opt.has_value() && bias_opt->defined()) {
38+
return bias_opt->expect_contiguous();
39+
}
40+
return c10::MaybeOwned<Tensor>::owned(std::in_place);
41+
}();
42+
43+
xpu::conv_depthwise2d_forward_kernel(
44+
*input,
45+
out,
46+
*weight,
47+
*bias,
48+
kernel_size[1],
49+
kernel_size[0],
50+
stride[1],
51+
stride[0],
52+
padding[1],
53+
padding[0],
54+
dilation[1],
55+
dilation[0]);
56+
return out;
57+
}
58+
59+
Tensor conv_depthwise2d_xpu(
60+
const Tensor& input,
61+
const Tensor& weight,
62+
IntArrayRef kernel_size,
63+
const std::optional<Tensor>& bias,
64+
IntArrayRef stride,
65+
IntArrayRef padding,
66+
IntArrayRef dilation) {
67+
auto out = at::empty({0}, input.options());
68+
return conv_depthwise2d_xpu_out(
69+
input, weight, kernel_size, bias, stride, padding, dilation, out);
70+
}
71+
72+
std::tuple<Tensor&, Tensor&> conv_depthwise2d_backward_xpu_out(
73+
const Tensor& grad_output_,
74+
const Tensor& self_,
75+
const Tensor& weight_,
76+
IntArrayRef kernel_size,
77+
IntArrayRef stride,
78+
IntArrayRef padding,
79+
IntArrayRef dilation,
80+
Tensor& grad_input,
81+
Tensor& grad_weight) {
82+
auto grad_output = grad_output_.expect_contiguous();
83+
84+
if (grad_weight.defined()) {
85+
auto self = self_.expect_contiguous();
86+
xpu::conv_depthwise2d_grad_weight_kernel(
87+
*self,
88+
*grad_output,
89+
grad_weight,
90+
kernel_size[1],
91+
kernel_size[0],
92+
stride[1],
93+
stride[0],
94+
padding[1],
95+
padding[0],
96+
dilation[1],
97+
dilation[0]);
98+
}
99+
100+
if (grad_input.defined()) {
101+
auto weight = weight_.expect_contiguous();
102+
xpu::conv_depthwise2d_backward_kernel(
103+
self_,
104+
*grad_output,
105+
grad_input,
106+
*weight,
107+
kernel_size[1],
108+
kernel_size[0],
109+
stride[1],
110+
stride[0],
111+
padding[1],
112+
padding[0],
113+
dilation[1],
114+
dilation[0]);
115+
}
116+
return std::forward_as_tuple(grad_input, grad_weight);
117+
}
118+
119+
std::tuple<Tensor, Tensor> conv_depthwise2d_backward_xpu(
120+
const Tensor& grad_output,
121+
const Tensor& self,
122+
const Tensor& weight,
123+
IntArrayRef kernel_size,
124+
IntArrayRef stride,
125+
IntArrayRef padding,
126+
IntArrayRef dilation,
127+
std::array<bool, 2> output_mask) {
128+
Tensor grad_input;
129+
Tensor grad_weight;
130+
131+
if (output_mask[0]) {
132+
grad_input = at::empty({0}, grad_output.options());
133+
}
134+
135+
if (output_mask[1]) {
136+
grad_weight = at::empty({0}, grad_output.options());
137+
}
138+
return conv_depthwise2d_backward_xpu_out(
139+
grad_output,
140+
self,
141+
weight,
142+
kernel_size,
143+
stride,
144+
padding,
145+
dilation,
146+
grad_input,
147+
grad_weight);
148+
}
149+
150+
} // namespace at::native
+95
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
2+
#include <ATen/AccumulateType.h>
3+
#include <ATen/Dispatch.h>
4+
#include <ATen/TensorUtils.h>
5+
#include <ATen/core/Tensor.h>
6+
#include <ATen/native/ConvUtils.h>
7+
#include <ATen/native/xpu/sycl/DepthwiseConv3dKernels.h>
8+
#include <ATen/native/xpu/sycl/KernelUtils.h>
9+
#include <comm/SYCLContext.h>
10+
11+
#ifndef AT_PER_OPERATOR_HEADERS
12+
#include <ATen/Functions.h>
13+
#include <ATen/NativeFunctions.h>
14+
#else
15+
#include <ATen/ops/conv_depthwise3d_native.h>
16+
#include <ATen/ops/empty.h>
17+
#endif
18+
19+
#include <algorithm>
20+
#include <limits>
21+
#include <tuple>
22+
23+
namespace at::native {
24+
Tensor conv_depthwise3d_xpu(
25+
const Tensor& input,
26+
const Tensor& weight,
27+
IntArrayRef kernel_size,
28+
const std::optional<Tensor>& bias_opt,
29+
IntArrayRef stride,
30+
IntArrayRef padding,
31+
IntArrayRef dilation) {
32+
return xpu::conv_depthwise3d_kernel(
33+
input, weight, kernel_size, bias_opt, stride, padding, dilation);
34+
}
35+
36+
std::tuple<Tensor&, Tensor&, Tensor&> conv_depthwise3d_backward_xpu_out(
37+
const Tensor& grad_output,
38+
const Tensor& input,
39+
const Tensor& weight,
40+
IntArrayRef kernel_size,
41+
IntArrayRef stride,
42+
IntArrayRef padding,
43+
IntArrayRef dilation,
44+
Tensor& grad_input,
45+
Tensor& grad_weight,
46+
Tensor& grad_bias) {
47+
if (grad_weight.defined()) {
48+
grad_weight.resize_(weight.sizes());
49+
grad_weight.zero_();
50+
}
51+
return xpu::_depthwise_3d_backward_kernel(
52+
grad_input,
53+
grad_weight,
54+
grad_bias,
55+
grad_output,
56+
input,
57+
weight,
58+
kernel_size,
59+
stride,
60+
padding,
61+
dilation,
62+
{true, true, true});
63+
}
64+
65+
std::tuple<Tensor, Tensor, Tensor> conv_depthwise3d_backward_xpu(
66+
const Tensor& grad_output,
67+
const Tensor& input,
68+
const Tensor& weight,
69+
IntArrayRef kernel_size,
70+
IntArrayRef stride,
71+
IntArrayRef padding,
72+
IntArrayRef dilation,
73+
const std::array<bool, 3> output_mask) {
74+
auto options = grad_output.options();
75+
Tensor grad_input =
76+
(output_mask[0] ? at::empty(input.sizes(), options) : Tensor());
77+
Tensor grad_weight =
78+
(output_mask[1] ? at::empty(weight.sizes(), options) : Tensor());
79+
Tensor grad_bias; /* undefined temporarily */
80+
81+
return xpu::_depthwise_3d_backward_kernel(
82+
grad_input,
83+
grad_weight,
84+
grad_bias,
85+
grad_output,
86+
input,
87+
weight,
88+
kernel_size,
89+
stride,
90+
padding,
91+
dilation,
92+
output_mask);
93+
}
94+
95+
} // namespace at::native

0 commit comments

Comments
 (0)