Skip to content

Commit

Permalink
v2.3.3: fix some problem in int8
Browse files Browse the repository at this point in the history
  • Loading branch information
FindDefinition committed Feb 2, 2023
1 parent b52636d commit 2309ebe
Show file tree
Hide file tree
Showing 8 changed files with 73 additions and 36 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Changelog

## [2.3.3] - 2023-02-02
### Fixed
- Fix int8 nvrtc error when use prebuilt
- Fix int8 kernel when run on turing GPU

## [2.3.2] - 2023-01-20
### Changed
- change version
Expand Down
6 changes: 2 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,9 @@
| CUDA 11.4 | [![PyPI Version][pypi-ver-114]][pypi-url-114] | ```pip install spconv-cu114```| [![pypi monthly download][pypi-download-114]][pypi-url-114]|
| CUDA 11.6 | [![PyPI Version][pypi-ver-116]][pypi-url-116] | ```pip install spconv-cu116```| [![pypi monthly download][pypi-download-116]][pypi-url-116]|
| CUDA 11.7 | [![PyPI Version][pypi-ver-117]][pypi-url-117] | ```pip install spconv-cu117```| [![pypi monthly download][pypi-download-117]][pypi-url-117]|
| CUDA 11.8* | [![PyPI Version][pypi-ver-118]][pypi-url-118] | ```pip install spconv-cu118```| [![pypi monthly download][pypi-download-118]][pypi-url-118]|
| CUDA 11.8 | [![PyPI Version][pypi-ver-118]][pypi-url-118] | ```pip install spconv-cu118```| [![pypi monthly download][pypi-download-118]][pypi-url-118]|
| CUDA 12.0 | [![PyPI Version][pypi-ver-120]][pypi-url-120] | ```pip install spconv-cu120```| [![pypi monthly download][pypi-download-120]][pypi-url-120]|

*: sm_89 and sm_90 is added in CUDA 11.8. If you use RTX 4090 or H100, you should use this version.

<!-- | CUDA 12.0 | [![PyPI Version][pypi-ver-120]][pypi-url-120] | ```pip install spconv-cu120```| [![pypi monthly download][pypi-download-120]][pypi-url-120]| -->

```spconv``` is a project that provide heavily-optimized sparse convolution implementation with tensor core support. check [benchmark](docs/BENCHMARK.md) to see how fast spconv 2.x runs.

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
[build-system]
requires = ["setuptools>=41.0", "wheel", "pccm>=0.4.0", "cumm>=0.4.5"]
# requires = ["setuptools>=41.0", "wheel", "pccm>=0.4.0", "cumm @ file:///io/dist/cumm_cu120-0.4.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl"]
# requires = ["setuptools>=41.0", "wheel", "pccm>=0.4.0", "cumm @ file:///io/dist/cumm_cu117-0.4.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl"]
build-backend = "setuptools.build_meta"
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,8 +167,8 @@ def run(self):
all_shuffle = SHUFFLE_SIMT_PARAMS + SHUFFLE_VOLTA_PARAMS + SHUFFLE_TURING_PARAMS + SHUFFLE_AMPERE_PARAMS
all_imp = (IMPLGEMM_SIMT_PARAMS + IMPLGEMM_VOLTA_PARAMS +
IMPLGEMM_TURING_PARAMS + IMPLGEMM_AMPERE_PARAMS)
all_shuffle = list(filter(lambda x: not x.is_nvrtc, all_shuffle))
all_imp = list(filter(lambda x: not x.is_nvrtc, all_imp))
# all_shuffle = list(filter(lambda x: not x.is_nvrtc, all_shuffle))
# all_imp = list(filter(lambda x: not x.is_nvrtc, all_imp))

cu = GemmMainUnitTest(all_shuffle)
convcu = ConvMainUnitTest(all_imp)
Expand Down
82 changes: 56 additions & 26 deletions spconv/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -840,7 +840,7 @@ class AlgoHint(Enum):
NHWC,
NHWC,
GemmAlgo.Turing,
TensorOp((16, 8, 16)),
TensorOp((8, 8, 16)),
mask_sparse=True,
increment_k_first=True,
access_per_vector=1,
Expand All @@ -855,7 +855,7 @@ class AlgoHint(Enum):
NHWC,
NHWC,
GemmAlgo.Turing,
TensorOp((16, 8, 16)),
TensorOp((8, 8, 16)),
mask_sparse=True,
increment_k_first=True,
access_per_vector=0,
Expand Down Expand Up @@ -1127,7 +1127,7 @@ class AlgoHint(Enum):
NHWC,
NHWC,
GemmAlgo.Turing,
TensorOp((16, 8, 16)),
TensorOp((8, 8, 16)),
mask_sparse=True,
increment_k_first=True,
access_per_vector=1,
Expand All @@ -1142,7 +1142,7 @@ class AlgoHint(Enum):
NHWC,
NHWC,
GemmAlgo.Turing,
TensorOp((16, 8, 16)),
TensorOp((8, 8, 16)),
mask_sparse=True,
increment_k_first=True,
access_per_vector=1,
Expand All @@ -1157,13 +1157,13 @@ class AlgoHint(Enum):
NHWC,
NHWC,
GemmAlgo.Turing,
TensorOp((16, 8, 16)),
TensorOp((8, 8, 16)),
mask_sparse=True,
increment_k_first=True,
access_per_vector=1,
is_nvrtc=True,
int8_inference=True),
*gen_conv_params(ConvFwdAndBwdInput, (64, 64, 64), (32, 32, 64),
*gen_conv_params(ConvFwdAndBwdInput, (32, 32, 32), (16, 32, 32),
NDIM_DONT_CARE,
ConvIterAlgo.Optimized,
2,
Expand All @@ -1172,14 +1172,13 @@ class AlgoHint(Enum):
NHWC,
NHWC,
GemmAlgo.Turing,
TensorOp((16, 8, 32)),
TensorOp((8, 8, 16)),
mask_sparse=True,
increment_k_first=True,
access_per_vector=1,
is_nvrtc=True,
int8_inference=True),

*gen_conv_params(ConvFwdAndBwdInput, (64, 128, 64), (32, 64, 64),
*gen_conv_params(ConvFwdAndBwdInput, (32, 32, 32), (16, 16, 32),
NDIM_DONT_CARE,
ConvIterAlgo.Optimized,
2,
Expand All @@ -1188,14 +1187,13 @@ class AlgoHint(Enum):
NHWC,
NHWC,
GemmAlgo.Turing,
TensorOp((16, 8, 32)),
TensorOp((8, 8, 16)),
mask_sparse=True,
increment_k_first=True,
access_per_vector=1,
is_nvrtc=True,
int8_inference=True),

*gen_conv_params(ConvFwdAndBwdInput, (64, 128, 32), (32, 64, 32),
*gen_conv_params(ConvFwdAndBwdInput, (32, 32, 32), (32, 16, 32),
NDIM_DONT_CARE,
ConvIterAlgo.Optimized,
2,
Expand All @@ -1204,14 +1202,14 @@ class AlgoHint(Enum):
NHWC,
NHWC,
GemmAlgo.Turing,
TensorOp((16, 8, 16)),
TensorOp((8, 8, 16)),
mask_sparse=True,
increment_k_first=True,
access_per_vector=1,
is_nvrtc=True,
int8_inference=True),

*gen_conv_params(ConvFwdAndBwdInput, (128, 64, 64), (64, 32, 64),
*gen_conv_params(ConvFwdAndBwdInput, (64, 64, 64), (32, 32, 64),
NDIM_DONT_CARE,
ConvIterAlgo.Optimized,
2,
Expand All @@ -1220,14 +1218,14 @@ class AlgoHint(Enum):
NHWC,
NHWC,
GemmAlgo.Turing,
TensorOp((16, 8, 32)),
TensorOp((8, 8, 16)),
mask_sparse=True,
increment_k_first=True,
access_per_vector=1,
is_nvrtc=True,
int8_inference=True),
# TODO 16,8,32 produce wrong result.
*gen_conv_params(ConvFwdAndBwdInput, (128, 64, 32), (64, 32, 32),

*gen_conv_params(ConvFwdAndBwdInput, (64, 128, 64), (32, 64, 64),
NDIM_DONT_CARE,
ConvIterAlgo.Optimized,
2,
Expand All @@ -1236,14 +1234,30 @@ class AlgoHint(Enum):
NHWC,
NHWC,
GemmAlgo.Turing,
TensorOp((16, 8, 16)),
TensorOp((8, 8, 16)),
mask_sparse=True,
increment_k_first=True,
access_per_vector=1,
is_nvrtc=True,
int8_inference=True),

*gen_conv_params(ConvFwdAndBwdInput, (64, 128, 32), (32, 64, 32),
NDIM_DONT_CARE,
ConvIterAlgo.Optimized,
2,
["s8,s8,s8,s32,f32", "s8,s8,s8,s32,f16"],
NHWC,
NHWC,
NHWC,
GemmAlgo.Turing,
TensorOp((8, 8, 16)),
mask_sparse=True,
increment_k_first=True,
access_per_vector=1,
is_nvrtc=True,
int8_inference=True),

*gen_conv_params(ConvFwdAndBwdInput, (128, 256, 64), (64, 128, 64),
*gen_conv_params(ConvFwdAndBwdInput, (128, 64, 64), (64, 32, 64),
NDIM_DONT_CARE,
ConvIterAlgo.Optimized,
2,
Expand All @@ -1252,14 +1266,14 @@ class AlgoHint(Enum):
NHWC,
NHWC,
GemmAlgo.Turing,
TensorOp((16, 8, 32)),
TensorOp((8, 8, 16)),
mask_sparse=True,
increment_k_first=True,
access_per_vector=1,
is_nvrtc=True,
int8_inference=True),

*gen_conv_params(ConvFwdAndBwdInput, (256, 128, 64), (128, 64, 64),
# TODO 16,8,32 produce wrong result.
*gen_conv_params(ConvFwdAndBwdInput, (128, 64, 32), (64, 32, 32),
NDIM_DONT_CARE,
ConvIterAlgo.Optimized,
2,
Expand All @@ -1268,14 +1282,14 @@ class AlgoHint(Enum):
NHWC,
NHWC,
GemmAlgo.Turing,
TensorOp((16, 8, 32)),
TensorOp((8, 8, 16)),
mask_sparse=True,
increment_k_first=True,
access_per_vector=1,
is_nvrtc=True,
int8_inference=True),

*gen_conv_params(ConvFwdAndBwdInput, (128, 128, 128), (64, 64, 128),
*gen_conv_params(ConvFwdAndBwdInput, (128, 256, 64), (64, 128, 64),
NDIM_DONT_CARE,
ConvIterAlgo.Optimized,
2,
Expand All @@ -1284,13 +1298,29 @@ class AlgoHint(Enum):
NHWC,
NHWC,
GemmAlgo.Turing,
TensorOp((16, 8, 32)),
TensorOp((8, 8, 16)),
mask_sparse=True,
increment_k_first=True,
access_per_vector=1,
is_nvrtc=True,
int8_inference=True),

*gen_conv_params(ConvFwdAndBwdInput, (256, 128, 64), (128, 64, 64),
NDIM_DONT_CARE,
ConvIterAlgo.Optimized,
2,
["s8,s8,s8,s32,f32", "s8,s8,s8,s32,f16"],
NHWC,
NHWC,
NHWC,
GemmAlgo.Turing,
TensorOp((8, 8, 16)),
mask_sparse=True,
increment_k_first=True,
access_per_vector=1,
is_nvrtc=True,
int8_inference=True),

*gen_conv_params(ConvFwdAndBwdInput, (128, 128, 64), (64, 64, 64),
NDIM_DONT_CARE,
ConvIterAlgo.Optimized,
Expand All @@ -1300,7 +1330,7 @@ class AlgoHint(Enum):
NHWC,
NHWC,
GemmAlgo.Turing,
TensorOp((16, 8, 32)),
TensorOp((8, 8, 16)),
mask_sparse=True,
increment_k_first=True,
access_per_vector=1,
Expand Down
7 changes: 5 additions & 2 deletions test/test_all_algo.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,17 +330,20 @@ def _test_impgemm_conv_cuda(subm: bool):
device = torch.device("cuda:0")
shapes = [[19, 18, 17]]
batchsizes = [1]
dtypes = [(np.float32, np.float32), (np.float16, np.float16)]
# dtypes = [(np.float32, np.float32), (np.float16, np.float16)]
# dtypes = [np.float16]
# dtypes = [(np.int8, np.int8), (np.int8, np.float32), (np.int8, np.float16)]
# dtypes = [(np.int8, np.int8)]
dtypes = [(np.int8, np.int8)]
# dtypes = [(np.float16, np.float16)]

test_case = TestCase()
# in_channels = [32]
# out_channels = [32, 48, 64]
in_channels = [32, 47]
out_channels = [32, 48, 62]
in_channels = [16]
out_channels = [16]

# in_channels = [16]
# out_channels = [16]

Expand Down
2 changes: 1 addition & 1 deletion tools/build-wheels-dev.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ function repair_wheel {
}
gcc -v
export SPCONV_DISABLE_JIT="1"
export CUMM_CUDA_ARCH_LIST="7.5"
export CUMM_CUDA_ARCH_LIST="8.6"
# export SPCONV_PYTHON_LIST="3.7;3.8;3.9;3.10"
# Compile wheels, we only support 3.6-3.10.
# "/opt/python/cp36-cp36m/bin/pip" wheel /io/ --no-deps -w /io/wheelhouse_tmp
Expand Down
2 changes: 1 addition & 1 deletion version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.3.2
2.3.3

0 comments on commit 2309ebe

Please sign in to comment.