diff --git a/docs/llamacpp_ops.md b/docs/llamacpp_ops.md index 49dac2b7a..23878210a 100644 --- a/docs/llamacpp_ops.md +++ b/docs/llamacpp_ops.md @@ -20,6 +20,7 @@ | DUP | ✅ | | ELU | ✅ | | EXP | ✅ | +| GATED_LINEAR_ATTN | ✅ | | GEGLU | ✅ | | GEGLU_ERF | ✅ | | GEGLU_QUICK | ✅ | @@ -65,12 +66,12 @@ | CPY | 🟡 | | CROSS_ENTROPY_LOSS_BACK | 🟡 | | FLASH_ATTN_EXT | 🟡 | -| GATED_LINEAR_ATTN | 🟡 | | GET_ROWS | 🟡 | | GET_ROWS_BACK | 🟡 | | MUL_MAT | 🟡 | | MUL_MAT_ID | 🟡 | | OPT_STEP_ADAMW | 🟡 | +| OUT_PROD | 🟡 | | PAD | 🟡 | | POOL_2D | 🟡 | | REPEAT_BACK | 🟡 | @@ -88,7 +89,6 @@ | SSM_SCAN | 🟡 | | SUM | 🟡 | | UPSCALE | 🟡 | -| OUT_PROD | ❌ | Tips: ✅ supported, 🟡 partially supported, ❌ fail, ❓ unsupported, 🔍 unknown @@ -96,72 +96,147 @@ Tips: ✅ supported, 🟡 partially supported, ❌ fail, ❓ unsupported, 🔍 u | Operator | Previous | Current | | --- | --- | --- | -| OUT_PROD | partial (🟡) | fail (❌) | -| SSM_CONV | partial (🟡) | supported (✅) | +| GATED_LINEAR_ATTN | partial (🟡) | supported (✅) | +| OUT_PROD | fail (❌) | partial (🟡) | -#### OUT_PROD log (partial -> fail) +#### GATED_LINEAR_ATTN log (partial -> supported) ```text new_pool_for_device: device 0 use vmm pool Testing 2 devices Backend 1/2: CANN0 Device description: Ascend910B1 - Device memory: 62420 MB (62055 MB free) + Device memory: 62420 MB (62052 MB free) -/__w/Ascend-CI/Ascend-CI/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp:3723: GGML_ASSERT(dst->ne[0] == nr) failed -libggml-base.so.0(+0x151a4)[0xffff9e6951a4] -libggml-base.so.0(ggml_print_backtrace+0x21c)[0xffff9e69565c] -libggml-base.so.0(ggml_abort+0x134)[0xffff9e695824] -libggml-cann.so.0(_Z18ggml_cann_ssm_convR25ggml_backend_cann_contextP11ggml_tensor+0x414)[0xffff9e051a84] -libggml-cann.so.0(+0x26820)[0xffff9e056820] -libggml-cann.so.0(+0x27358)[0xffff9e057358] -libggml-base.so.0(ggml_backend_graph_compute+0x14)[0xffff9e6ab184] -libggml-base.so.0(ggml_backend_compare_graph_backend+0x170)[0xffff9e6afc70] -./test-backend-ops(+0x80f7c)[0xaaaac70c0f7c] -./test-backend-ops(+0x36618)[0xaaaac7076618] -./test-backend-ops(+0x16a10)[0xaaaac7056a10] -/lib/aarch64-linux-gnu/libc.so.6(+0x273fc)[0xffff9e1f73fc] -/lib/aarch64-linux-gnu/libc.so.6(__libc_start_main+0x98)[0xffff9e1f74cc] -./test-backend-ops(+0x180f0)[0xaaaac70580f0] + GATED_LINEAR_ATTN(type=f32,head_count=32,head_size=64,n_seq_tokens=1,n_seqs=1): OK + GATED_LINEAR_ATTN(type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=1): OK + GATED_LINEAR_ATTN(type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=4): OK + GATED_LINEAR_ATTN(type=f32,head_count=32,head_size=64,n_seq_tokens=128,n_seqs=4): OK + 4/4 tests passed + Backend CANN0: OK +Backend 2/2: CPU + Skipping CPU backend +2/2 backends passed +OK ``` -#### SSM_CONV log (partial -> supported) +#### OUT_PROD log (fail -> partial) ```text -new_pool_for_device: device 0 use vmm pool -Testing 2 devices - -Backend 1/2: CANN0 - Device description: Ascend910B1 - Device memory: 62420 MB (62051 MB free) - - SSM_CONV(type=f32,ne_a=[3,1024,1,1],ne_b=[3,1024,1,1]): OK - SSM_CONV(type=f32,ne_a=[6,1024,1,1],ne_b=[3,1024,1,1]): OK - SSM_CONV(type=f32,ne_a=[3,1024,4,1],ne_b=[3,1024,1,1]): OK - SSM_CONV(type=f32,ne_a=[3,1536,1,1],ne_b=[3,1536,1,1]): OK - SSM_CONV(type=f32,ne_a=[6,1536,1,1],ne_b=[3,1536,1,1]): OK - SSM_CONV(type=f32,ne_a=[3,1536,4,1],ne_b=[3,1536,1,1]): OK - SSM_CONV(type=f32,ne_a=[3,2048,1,1],ne_b=[3,2048,1,1]): OK - SSM_CONV(type=f32,ne_a=[6,2048,1,1],ne_b=[3,2048,1,1]): OK - SSM_CONV(type=f32,ne_a=[3,2048,4,1],ne_b=[3,2048,1,1]): OK - SSM_CONV(type=f32,ne_a=[4,1024,1,1],ne_b=[4,1024,1,1]): OK - SSM_CONV(type=f32,ne_a=[8,1024,1,1],ne_b=[4,1024,1,1]): OK - SSM_CONV(type=f32,ne_a=[4,1024,4,1],ne_b=[4,1024,1,1]): OK - SSM_CONV(type=f32,ne_a=[4,1536,1,1],ne_b=[4,1536,1,1]): OK - SSM_CONV(type=f32,ne_a=[8,1536,1,1],ne_b=[4,1536,1,1]): OK - SSM_CONV(type=f32,ne_a=[4,1536,4,1],ne_b=[4,1536,1,1]): OK - SSM_CONV(type=f32,ne_a=[4,2048,1,1],ne_b=[4,2048,1,1]): OK - SSM_CONV(type=f32,ne_a=[8,2048,1,1],ne_b=[4,2048,1,1]): OK - SSM_CONV(type=f32,ne_a=[4,2048,4,1],ne_b=[4,2048,1,1]): OK - SSM_CONV(type=f32,ne_a=[9,1024,1,1],ne_b=[9,1024,1,1]): OK - SSM_CONV(type=f32,ne_a=[18,1024,1,1],ne_b=[9,1024,1,1]): OK - SSM_CONV(type=f32,ne_a=[9,1024,4,1],ne_b=[9,1024,1,1]): OK - SSM_CONV(type=f32,ne_a=[9,1536,1,1],ne_b=[9,1536,1,1]): OK - SSM_CONV(type=f32,ne_a=[18,1536,1,1],ne_b=[9,1536,1,1]): OK - SSM_CONV(type=f32,ne_a=[9,1536,4,1],ne_b=[9,1536,1,1]): OK - SSM_CONV(type=f32,ne_a=[9,2048,1,1],ne_b=[9,2048,1,1]): OK - SSM_CONV(type=f32,ne_a=[18,2048,1,1],ne_b=[9,2048,1,1]): OK - SSM_CONV(type=f32,ne_a=[9,2048,4,1],ne_b=[9,2048,1,1]): OK - 27/27 tests passed + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0): not supported [CANN0] + OUT_PROD(type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0): not supported [CANN0] + 64/64 tests passed Backend CANN0: OK Backend 2/2: CPU Skipping CPU backend