Skip to content

Commit 7792c30

Browse files
DongheJinyq33victor
authored andcommitted
feat: support the fusion of topk and add operators in the router module of the moe model.
1 parent 3718637 commit 7792c30

File tree

3 files changed

+4
-6
lines changed

3 files changed

+4
-6
lines changed

CMakeLists.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,20 +28,20 @@ if(USE_NPU)
2828
if(DEVICE_TYPE STREQUAL "USE_A3")
2929
message("downloading a3 arm xllm kernels")
3030
file(DOWNLOAD
31-
"https://9n-das-tools.s3.cn-north-1.jdcloud-oss.com/xllm-ai/xllm_kernels/0.6.0/xllm_kernels-1.3.2-Linux.a3.arm.rpm"
31+
"https://9n-das-tools.s3.cn-north-1.jdcloud-oss.com/xllm-ai/xllm_kernels/0.7.0/xllm_kernels-1.3.2-Linux.a3.arm.rpm"
3232
"${CMAKE_BINARY_DIR}/xllm_kernels.rpm"
3333
)
3434
else()
3535
if(DEVICE_ARCH STREQUAL "ARM")
3636
message("downloading a2 arm xllm_kernels")
3737
file(DOWNLOAD
38-
"https://9n-das-tools.s3.cn-north-1.jdcloud-oss.com/xllm-ai/xllm_kernels/0.6.0/xllm_kernels-1.3.2-Linux.a2.arm.rpm"
38+
"https://9n-das-tools.s3.cn-north-1.jdcloud-oss.com/xllm-ai/xllm_kernels/0.7.0/xllm_kernels-1.3.2-Linux.a2.arm.rpm"
3939
"${CMAKE_BINARY_DIR}/xllm_kernels.rpm"
4040
)
4141
else()
4242
message("downloading a2 x86 xllm_kernels")
4343
file(DOWNLOAD
44-
"https://9n-das-tools.s3.cn-north-1.jdcloud-oss.com/xllm-ai/xllm_kernels/0.6.0/xllm_kernels-1.3.2-Linux.a2.x86.rpm"
44+
"https://9n-das-tools.s3.cn-north-1.jdcloud-oss.com/xllm-ai/xllm_kernels/0.7.0/xllm_kernels-1.3.2-Linux.a2.x86.rpm"
4545
"${CMAKE_BINARY_DIR}/xllm_kernels.rpm"
4646
)
4747
endif()

third_party/minja

Submodule minja updated from 2809705 to cf9734a

xllm/core/runtime/llm_master.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -300,8 +300,6 @@ std::shared_ptr<Request> LLMMaster::generate_request(
300300
}
301301
}
302302

303-
// local_prompt_tokens = {151331, 151333, 151336, 198, 109377, 151337};
304-
// LOG(INFO)<<local_prompt_tokens;
305303
COUNTER_ADD(tokenization_latency_seconds, timer.elapsed_seconds());
306304

307305
int32_t max_context_len = model_args_.max_position_embeddings();

0 commit comments

Comments
 (0)