diff --git a/CMakeLists.txt b/CMakeLists.txt index caea48c5060..310a3dcfd24 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -225,7 +225,7 @@ foreach(FILE_PATH ${EXTRA_LICENSES}) endforeach() if (LLAMA_BUILD_COMMON) - license_generate(common) + license_generate(llama-common) endif() # @@ -249,6 +249,10 @@ set_target_properties(llama install(TARGETS llama LIBRARY PUBLIC_HEADER) +if (LLAMA_BUILD_COMMON) + install(TARGETS llama-common LIBRARY) +endif() + configure_package_config_file( ${CMAKE_CURRENT_SOURCE_DIR}/cmake/llama-config.cmake.in ${CMAKE_CURRENT_BINARY_DIR}/llama-config.cmake diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index b313a7320e5..7a911c63e9d 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -1,9 +1,11 @@ -# common - find_package(Threads REQUIRED) llama_add_compile_flags() +# +# llama-common-base +# + # Build info header if(EXISTS "${PROJECT_SOURCE_DIR}/.git") @@ -33,17 +35,25 @@ endif() set(TEMPLATE_FILE "${CMAKE_CURRENT_SOURCE_DIR}/build-info.cpp.in") set(OUTPUT_FILE "${CMAKE_CURRENT_BINARY_DIR}/build-info.cpp") + configure_file(${TEMPLATE_FILE} ${OUTPUT_FILE}) -set(TARGET build_info) -add_library(${TARGET} OBJECT ${OUTPUT_FILE}) +set(TARGET llama-common-base) +add_library(${TARGET} STATIC ${OUTPUT_FILE}) + +target_include_directories(${TARGET} PUBLIC .) + if (BUILD_SHARED_LIBS) set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON) endif() -set(TARGET common) +# +# llama-common +# -add_library(${TARGET} STATIC +set(TARGET llama-common) + +add_library(${TARGET} arg.cpp arg.h base64.hpp @@ -106,17 +116,24 @@ add_library(${TARGET} STATIC jinja/caps.h ) +set_target_properties(${TARGET} PROPERTIES + VERSION ${LLAMA_INSTALL_VERSION} + SOVERSION 0 + MACHO_CURRENT_VERSION 0 # keep macOS linker from seeing oversized version number +) + target_include_directories(${TARGET} PUBLIC . ../vendor) target_compile_features (${TARGET} PUBLIC cxx_std_17) if (BUILD_SHARED_LIBS) set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON) + + # TODO: make fine-grained exports in the future + set_target_properties(${TARGET} PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON) endif() -target_link_libraries(${TARGET} PRIVATE - build_info - cpp-httplib -) +target_link_libraries(${TARGET} PUBLIC llama-common-base) +target_link_libraries(${TARGET} PRIVATE cpp-httplib) if (LLAMA_LLGUIDANCE) include(ExternalProject) diff --git a/common/arg.cpp b/common/arg.cpp index 3d0183ed702..6f22f781915 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -1,5 +1,6 @@ #include "arg.h" +#include "build-info.h" #include "chat.h" #include "common.h" #include "download.h" @@ -1044,8 +1045,8 @@ common_params_context common_params_parser_init(common_params & params, llama_ex {"--version"}, "show version and build info", [](common_params &) { - fprintf(stderr, "version: %d (%s)\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT); - fprintf(stderr, "built with %s for %s\n", LLAMA_COMPILER, LLAMA_BUILD_TARGET); + fprintf(stderr, "version: %d (%s)\n", llama_build_number(), llama_commit()); + fprintf(stderr, "built with %s for %s\n", llama_compiler(), llama_build_target()); exit(0); } )); diff --git a/common/build-info.cpp.in b/common/build-info.cpp.in index aee9d7eafd6..f888fd079fa 100644 --- a/common/build-info.cpp.in +++ b/common/build-info.cpp.in @@ -1,4 +1,35 @@ +#include "build-info.h" + +#include +#include + int LLAMA_BUILD_NUMBER = @LLAMA_BUILD_NUMBER@; -char const *LLAMA_COMMIT = "@LLAMA_BUILD_COMMIT@"; -char const *LLAMA_COMPILER = "@BUILD_COMPILER@"; -char const *LLAMA_BUILD_TARGET = "@BUILD_TARGET@"; +char const * LLAMA_COMMIT = "@LLAMA_BUILD_COMMIT@"; +char const * LLAMA_COMPILER = "@BUILD_COMPILER@"; +char const * LLAMA_BUILD_TARGET = "@BUILD_TARGET@"; + +int llama_build_number(void) { + return LLAMA_BUILD_NUMBER; +} + +const char * llama_commit(void) { + return LLAMA_COMMIT; +} + +const char * llama_compiler(void) { + return LLAMA_COMPILER; +} + +const char * llama_build_target(void) { + return LLAMA_BUILD_TARGET; +} + +const char * llama_build_info(void) { + static std::string s = "b" + std::to_string(LLAMA_BUILD_NUMBER) + "-" + LLAMA_COMMIT; + return s.c_str(); +} + +void llama_print_build_info(void) { + fprintf(stderr, "%s: build = %d (%s)\n", __func__, llama_build_number(), llama_commit()); + fprintf(stderr, "%s: built with %s for %s\n", __func__, llama_compiler(), llama_build_target()); +} diff --git a/common/build-info.h b/common/build-info.h new file mode 100644 index 00000000000..382cfa78500 --- /dev/null +++ b/common/build-info.h @@ -0,0 +1,11 @@ +#pragma once + +int llama_build_number(void); + +const char * llama_commit(void); +const char * llama_compiler(void); + +const char * llama_build_target(void); +const char * llama_build_info(void); + +void llama_print_build_info(void); diff --git a/common/common.cpp b/common/common.cpp index 16f78debd02..d3f1cee394c 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1,6 +1,7 @@ #include "ggml.h" #include "gguf.h" +#include "build-info.h" #include "common.h" #include "log.h" #include "llama.h" @@ -372,7 +373,7 @@ void common_init() { const char * build_type = " (debug)"; #endif - LOG_DBG("build: %d (%s) with %s for %s%s\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT, LLAMA_COMPILER, LLAMA_BUILD_TARGET, build_type); + LOG_DBG("build: %d (%s) with %s for %s%s\n", llama_build_number(), llama_commit(), llama_compiler(), llama_build_target(), build_type); } std::string common_params_get_system_info(const common_params & params) { diff --git a/common/common.h b/common/common.h index 020b6a721ff..81c26955656 100644 --- a/common/common.h +++ b/common/common.h @@ -2,9 +2,10 @@ #pragma once +#include "llama-cpp.h" + #include "ggml-opt.h" #include "ggml.h" -#include "llama-cpp.h" #include #include @@ -27,11 +28,6 @@ #define die(msg) do { fputs("error: " msg "\n", stderr); exit(1); } while (0) #define die_fmt(fmt, ...) do { fprintf(stderr, "error: " fmt "\n", __VA_ARGS__); exit(1); } while (0) -#define print_build_info() do { \ - fprintf(stderr, "%s: build = %d (%s)\n", __func__, LLAMA_BUILD_NUMBER, LLAMA_COMMIT); \ - fprintf(stderr, "%s: built with %s for %s\n", __func__, LLAMA_COMPILER, LLAMA_BUILD_TARGET); \ -} while(0) - struct common_time_meas { common_time_meas(int64_t & t_acc, bool disable = false); ~common_time_meas(); @@ -53,14 +49,6 @@ struct common_adapter_lora_info { using llama_tokens = std::vector; -// build info -extern int LLAMA_BUILD_NUMBER; -extern const char * LLAMA_COMMIT; -extern const char * LLAMA_COMPILER; -extern const char * LLAMA_BUILD_TARGET; - -const static std::string build_info("b" + std::to_string(LLAMA_BUILD_NUMBER) + "-" + LLAMA_COMMIT); - struct common_control_vector_load_info; // diff --git a/common/download.cpp b/common/download.cpp index 0e0034e1da3..c4bb02d90c2 100644 --- a/common/download.cpp +++ b/common/download.cpp @@ -1,5 +1,6 @@ #include "arg.h" +#include "build-info.h" #include "common.h" #include "log.h" #include "download.h" @@ -303,7 +304,7 @@ static int common_download_file_single_online(const std::string & url, headers.emplace(h.first, h.second); } if (headers.find("User-Agent") == headers.end()) { - headers.emplace("User-Agent", "llama-cpp/" + build_info); + headers.emplace("User-Agent", "llama-cpp/" + std::string(llama_build_info())); } if (!opts.bearer_token.empty()) { headers.emplace("Authorization", "Bearer " + opts.bearer_token); @@ -441,7 +442,7 @@ std::pair> common_remote_get_content(const std::string headers.emplace(h.first, h.second); } if (headers.find("User-Agent") == headers.end()) { - headers.emplace("User-Agent", "llama-cpp/" + build_info); + headers.emplace("User-Agent", "llama-cpp/" + std::string(llama_build_info())); } if (params.timeout > 0) { diff --git a/common/hf-cache.cpp b/common/hf-cache.cpp index 665c9ff066a..38a4c17a98e 100644 --- a/common/hf-cache.cpp +++ b/common/hf-cache.cpp @@ -1,5 +1,6 @@ #include "hf-cache.h" +#include "build-info.h" #include "common.h" #include "log.h" #include "http.h" @@ -200,7 +201,7 @@ static nl::json api_get(const std::string & url, auto [cli, parts] = common_http_client(url); httplib::Headers headers = { - {"User-Agent", "llama-cpp/" + build_info}, + {"User-Agent", "llama-cpp/" + std::string(llama_build_info())}, {"Accept", "application/json"} }; diff --git a/common/log.cpp b/common/log.cpp index b17d2b62c35..dec4ef5fc70 100644 --- a/common/log.cpp +++ b/common/log.cpp @@ -23,6 +23,10 @@ int common_log_verbosity_thold = LOG_DEFAULT_LLAMA; +int common_log_get_verbosity_thold(void) { + return common_log_verbosity_thold; +} + void common_log_set_verbosity_thold(int verbosity) { common_log_verbosity_thold = verbosity; } diff --git a/common/log.h b/common/log.h index f0f8471b5f4..cf32ca185ca 100644 --- a/common/log.h +++ b/common/log.h @@ -38,7 +38,7 @@ enum log_colors { // needed by the LOG_TMPL macro to avoid computing log arguments if the verbosity lower // set via common_log_set_verbosity() -extern int common_log_verbosity_thold; +int common_log_get_verbosity_thold(void); void common_log_set_verbosity_thold(int verbosity); // not thread-safe @@ -98,7 +98,7 @@ void common_log_flush (struct common_log * log); // f #define LOG_TMPL(level, verbosity, ...) \ do { \ - if ((verbosity) <= common_log_verbosity_thold) { \ + if ((verbosity) <= common_log_get_verbosity_thold()) { \ common_log_add(common_log_main(), (level), __VA_ARGS__); \ } \ } while (0) diff --git a/examples/batched/CMakeLists.txt b/examples/batched/CMakeLists.txt index 0d439f49842..1d7c2a0f6e7 100644 --- a/examples/batched/CMakeLists.txt +++ b/examples/batched/CMakeLists.txt @@ -1,5 +1,5 @@ set(TARGET llama-batched) add_executable(${TARGET} batched.cpp) install(TARGETS ${TARGET} RUNTIME) -target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) diff --git a/examples/convert-llama2c-to-ggml/CMakeLists.txt b/examples/convert-llama2c-to-ggml/CMakeLists.txt index 44e5f722a97..2162da4fdf7 100644 --- a/examples/convert-llama2c-to-ggml/CMakeLists.txt +++ b/examples/convert-llama2c-to-ggml/CMakeLists.txt @@ -1,5 +1,5 @@ set(TARGET llama-convert-llama2c-to-ggml) add_executable(${TARGET} convert-llama2c-to-ggml.cpp) install(TARGETS ${TARGET} RUNTIME) -target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) diff --git a/examples/debug/CMakeLists.txt b/examples/debug/CMakeLists.txt index 34593072be2..fb1c7e25814 100644 --- a/examples/debug/CMakeLists.txt +++ b/examples/debug/CMakeLists.txt @@ -1,5 +1,5 @@ set(TARGET llama-debug) add_executable(${TARGET} debug.cpp) install(TARGETS ${TARGET} RUNTIME) -target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) diff --git a/examples/diffusion/CMakeLists.txt b/examples/diffusion/CMakeLists.txt index 396549c8029..70228d4079b 100644 --- a/examples/diffusion/CMakeLists.txt +++ b/examples/diffusion/CMakeLists.txt @@ -1,5 +1,5 @@ set(TARGET llama-diffusion-cli) add_executable(${TARGET} diffusion-cli.cpp) install(TARGETS ${TARGET} RUNTIME) -target_link_libraries(${TARGET} PRIVATE llama common ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE llama llama-common ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) diff --git a/examples/embedding/CMakeLists.txt b/examples/embedding/CMakeLists.txt index 809040307d2..0634c7bd820 100644 --- a/examples/embedding/CMakeLists.txt +++ b/examples/embedding/CMakeLists.txt @@ -1,5 +1,5 @@ set(TARGET llama-embedding) add_executable(${TARGET} embedding.cpp) install(TARGETS ${TARGET} RUNTIME) -target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) diff --git a/examples/eval-callback/CMakeLists.txt b/examples/eval-callback/CMakeLists.txt index 6439690a519..63fbe59dce8 100644 --- a/examples/eval-callback/CMakeLists.txt +++ b/examples/eval-callback/CMakeLists.txt @@ -1,7 +1,7 @@ set(TARGET llama-eval-callback) add_executable(${TARGET} eval-callback.cpp) install(TARGETS ${TARGET} RUNTIME) -target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) if(LLAMA_BUILD_TESTS) diff --git a/examples/gen-docs/CMakeLists.txt b/examples/gen-docs/CMakeLists.txt index 25de0af35df..aa68cbd78a8 100644 --- a/examples/gen-docs/CMakeLists.txt +++ b/examples/gen-docs/CMakeLists.txt @@ -1,5 +1,5 @@ set(TARGET llama-gen-docs) add_executable(${TARGET} gen-docs.cpp) install(TARGETS ${TARGET} RUNTIME) -target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) diff --git a/examples/idle/CMakeLists.txt b/examples/idle/CMakeLists.txt index d5018fec4b7..c0fedbbff5b 100644 --- a/examples/idle/CMakeLists.txt +++ b/examples/idle/CMakeLists.txt @@ -1,5 +1,5 @@ set(TARGET llama-idle) add_executable(${TARGET} idle.cpp) install(TARGETS ${TARGET} RUNTIME) -target_link_libraries(${TARGET} PRIVATE llama common ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE llama llama-common ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_11) diff --git a/examples/lookahead/CMakeLists.txt b/examples/lookahead/CMakeLists.txt index 3468613142d..5d6e604fa98 100644 --- a/examples/lookahead/CMakeLists.txt +++ b/examples/lookahead/CMakeLists.txt @@ -1,5 +1,5 @@ set(TARGET llama-lookahead) add_executable(${TARGET} lookahead.cpp) install(TARGETS ${TARGET} RUNTIME) -target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) diff --git a/examples/lookup/CMakeLists.txt b/examples/lookup/CMakeLists.txt index fba78ceda6f..09f7d2e3c92 100644 --- a/examples/lookup/CMakeLists.txt +++ b/examples/lookup/CMakeLists.txt @@ -1,23 +1,23 @@ set(TARGET llama-lookup) add_executable(${TARGET} lookup.cpp) install(TARGETS ${TARGET} RUNTIME) -target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) set(TARGET llama-lookup-create) add_executable(${TARGET} lookup-create.cpp) install(TARGETS ${TARGET} RUNTIME) -target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) set(TARGET llama-lookup-merge) add_executable(${TARGET} lookup-merge.cpp) install(TARGETS ${TARGET} RUNTIME) -target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) set(TARGET llama-lookup-stats) add_executable(${TARGET} lookup-stats.cpp) install(TARGETS ${TARGET} RUNTIME) -target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) diff --git a/examples/parallel/CMakeLists.txt b/examples/parallel/CMakeLists.txt index 847e916de6e..4fb7a96aae3 100644 --- a/examples/parallel/CMakeLists.txt +++ b/examples/parallel/CMakeLists.txt @@ -1,5 +1,5 @@ set(TARGET llama-parallel) add_executable(${TARGET} parallel.cpp) install(TARGETS ${TARGET} RUNTIME) -target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) diff --git a/examples/passkey/CMakeLists.txt b/examples/passkey/CMakeLists.txt index 9bc5110c293..12558cc2557 100644 --- a/examples/passkey/CMakeLists.txt +++ b/examples/passkey/CMakeLists.txt @@ -1,5 +1,5 @@ set(TARGET llama-passkey) add_executable(${TARGET} passkey.cpp) install(TARGETS ${TARGET} RUNTIME) -target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) diff --git a/examples/retrieval/CMakeLists.txt b/examples/retrieval/CMakeLists.txt index 512a602ec04..5927ff8a852 100644 --- a/examples/retrieval/CMakeLists.txt +++ b/examples/retrieval/CMakeLists.txt @@ -1,5 +1,5 @@ set(TARGET llama-retrieval) add_executable(${TARGET} retrieval.cpp) install(TARGETS ${TARGET} RUNTIME) -target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) diff --git a/examples/save-load-state/CMakeLists.txt b/examples/save-load-state/CMakeLists.txt index 0f50e50deec..78024672e77 100644 --- a/examples/save-load-state/CMakeLists.txt +++ b/examples/save-load-state/CMakeLists.txt @@ -1,5 +1,5 @@ set(TARGET llama-save-load-state) add_executable(${TARGET} save-load-state.cpp) install(TARGETS ${TARGET} RUNTIME) -target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) diff --git a/examples/speculative-simple/CMakeLists.txt b/examples/speculative-simple/CMakeLists.txt index aeaea74fcd1..5ef3b4131f2 100644 --- a/examples/speculative-simple/CMakeLists.txt +++ b/examples/speculative-simple/CMakeLists.txt @@ -1,5 +1,5 @@ set(TARGET llama-speculative-simple) add_executable(${TARGET} speculative-simple.cpp) install(TARGETS ${TARGET} RUNTIME) -target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) diff --git a/examples/speculative/CMakeLists.txt b/examples/speculative/CMakeLists.txt index c84196bd95b..b4e20c717a2 100644 --- a/examples/speculative/CMakeLists.txt +++ b/examples/speculative/CMakeLists.txt @@ -1,5 +1,5 @@ set(TARGET llama-speculative) add_executable(${TARGET} speculative.cpp) install(TARGETS ${TARGET} RUNTIME) -target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) diff --git a/examples/sycl/CMakeLists.txt b/examples/sycl/CMakeLists.txt index e4d5083e6e5..40e44eefc8a 100644 --- a/examples/sycl/CMakeLists.txt +++ b/examples/sycl/CMakeLists.txt @@ -5,5 +5,5 @@ set(TARGET llama-ls-sycl-device) add_executable(${TARGET} ls-sycl-device.cpp) install(TARGETS ${TARGET} RUNTIME) -target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) diff --git a/examples/training/CMakeLists.txt b/examples/training/CMakeLists.txt index 64afe6ddc64..8bb20d0f213 100644 --- a/examples/training/CMakeLists.txt +++ b/examples/training/CMakeLists.txt @@ -1,5 +1,5 @@ set(TARGET llama-finetune) add_executable(${TARGET} finetune.cpp) install(TARGETS ${TARGET} RUNTIME) -target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_11) diff --git a/ggml/src/ggml-opencl/ggml-opencl.cpp b/ggml/src/ggml-opencl/ggml-opencl.cpp index b27fbb13a3a..8bc7ae65a6d 100644 --- a/ggml/src/ggml-opencl/ggml-opencl.cpp +++ b/ggml/src/ggml-opencl/ggml-opencl.cpp @@ -5116,115 +5116,8 @@ static void ggml_backend_opencl_buffer_set_tensor(ggml_backend_buffer_t buffer, GGML_ASSERT(tensor->ne[2] == 1); GGML_ASSERT(tensor->ne[3] == 1); - // Transpose weights - size_t q_size_bytes = K * M / 4 * sizeof(float); - cl_buffer_region region; - region.origin = 0; - region.size = q_size_bytes; - cl_mem qT_d = clCreateSubBuffer( - backend_ctx->prealloc_quant_trans.buffer, - 0, - CL_BUFFER_CREATE_TYPE_REGION, - ®ion, - &err); - CL_CHECK(err); - - cl_mem q_d_image1D; - cl_mem qT_d_image1D; - - cl_image_format img_fmt_1d; - cl_image_desc img_desc_1d; - - img_fmt_1d = { CL_RGBA, CL_FLOAT }; - memset(&img_desc_1d, 0, sizeof(img_desc_1d)); - img_desc_1d.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; - img_desc_1d.image_width = M * K / 4 / 4; - img_desc_1d.buffer = extra->q; - q_d_image1D = clCreateImage(context, 0, &img_fmt_1d, &img_desc_1d, NULL, &err); - CL_CHECK(err); - - img_fmt_1d = { CL_RGBA, CL_FLOAT }; - memset(&img_desc_1d, 0, sizeof(img_desc_1d)); - img_desc_1d.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; - img_desc_1d.image_width = M * K / 4 / 4; - img_desc_1d.buffer = qT_d; - qT_d_image1D = clCreateImage(context, 0, &img_fmt_1d, &img_desc_1d, NULL, &err); - CL_CHECK(err); - - int height_q = M / 4; - int width_q = K / 4 / 4; - kernel = backend_ctx->kernel_transpose_32; - - CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &q_d_image1D)); - CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), &qT_d_image1D)); - CL_CHECK(clSetKernelArg(kernel, 2, sizeof(int), &height_q)); - CL_CHECK(clSetKernelArg(kernel, 3, sizeof(int), &width_q)); - - size_t local_size_q[3] = {4, 16, 1}; - size_t global_size_q[3] = {static_cast(width_q), static_cast(height_q), 1}; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_size_q, local_size_q, 0, NULL, &evt)); - CL_CHECK(clWaitForEvents(1, &evt)); - - // Transpose scales - size_t d_size_bytes = M * (K / 32) * 2; - region.origin = 0; - region.size = d_size_bytes; - cl_mem dT_d = clCreateSubBuffer( - backend_ctx->prealloc_scales_trans.buffer, - 0, - CL_BUFFER_CREATE_TYPE_REGION, - ®ion, - &err); - CL_CHECK(err); - - cl_mem d_d_image1D; - cl_mem dT_d_image1D; - - memset(&img_desc_1d, 0, sizeof(img_desc_1d)); - img_fmt_1d = { CL_R, CL_HALF_FLOAT }; - img_desc_1d.image_width = M * K / 32; - img_desc_1d.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; - img_desc_1d.buffer = extra->d; - d_d_image1D = clCreateImage(context, 0, &img_fmt_1d, &img_desc_1d, NULL, &err); - CL_CHECK(err); - - img_fmt_1d = { CL_RGBA, CL_HALF_FLOAT }; - memset(&img_desc_1d, 0, sizeof(img_desc_1d)); - img_desc_1d.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; - img_desc_1d.image_width = M * K / 32 / 4; - img_desc_1d.buffer = dT_d; - dT_d_image1D = clCreateImage(context, 0, &img_fmt_1d, &img_desc_1d, NULL, &err); - CL_CHECK(err); - - int height_s = M / 4; - int width_s = K / 32; - - kernel = backend_ctx->kernel_transpose_16_4x1; - - CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_d_image1D)); - CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), &dT_d_image1D)); - CL_CHECK(clSetKernelArg(kernel, 2, sizeof(int), &height_s)); - CL_CHECK(clSetKernelArg(kernel, 3, sizeof(int), &width_s)); - - size_t local_size_s[3] = {4, 16, 1}; - size_t global_size_s[3] = {static_cast(width_s), static_cast(height_s), 1}; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_size_s, local_size_s, 0, NULL, &evt)); - CL_CHECK(clWaitForEvents(1, &evt)); - - // copy transposed buffer contents to original buffers - CL_CHECK(clEnqueueCopyBuffer(queue, qT_d, extra->q, 0, 0, q_size_bytes, 0, NULL, &evt)); - CL_CHECK(clWaitForEvents(1, &evt)); - - CL_CHECK(clEnqueueCopyBuffer(queue, dT_d, extra->d, 0, 0, d_size_bytes, 0, NULL, &evt)); - CL_CHECK(clWaitForEvents(1, &evt)); - - CL_CHECK(clReleaseMemObject(qT_d)); - CL_CHECK(clReleaseMemObject(dT_d)); - - CL_CHECK(clReleaseMemObject(q_d_image1D)); - CL_CHECK(clReleaseMemObject(d_d_image1D)); - CL_CHECK(clReleaseMemObject(qT_d_image1D)); - CL_CHECK(clReleaseMemObject(dT_d_image1D)); + transpose_2d_as_32b(backend_ctx, extra->q, extra->q, size_q, K/4, M); + transpose_2d_as_16b(backend_ctx, extra->d, extra->d, size_d, K/32, M); } // end transpose #endif // GGML_OPENCL_USE_ADRENO_KERNELS @@ -9956,19 +9849,18 @@ static void ggml_cl_mul_mat_q8_0_f32_adreno(ggml_backend_t backend, const ggml_t GGML_ASSERT(dst); GGML_ASSERT(dst->extra); - const enum ggml_type src0t = src0->type; - const enum ggml_type src1t = src1->type; - - GGML_ASSERT(src0t == GGML_TYPE_Q8_0); - GGML_ASSERT(src1t == GGML_TYPE_F32); + GGML_ASSERT(src0->type == GGML_TYPE_Q8_0); + GGML_ASSERT(src1->type == GGML_TYPE_F32); ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context; ggml_tensor_extra_cl * extra1 = (ggml_tensor_extra_cl *)src1->extra; ggml_tensor_extra_cl * extrad = (ggml_tensor_extra_cl *)dst->extra; - ggml_tensor_extra_cl_q8_0 * extra0_q8_0 = (ggml_tensor_extra_cl_q8_0 *)src0->extra; + cl_ulong offset1 = extra1->offset + src1->view_offs; + cl_ulong offsetd = extrad->offset + dst->view_offs; + GGML_ASSERT(src1->view_offs == 0); GGML_ASSERT(dst->view_offs == 0); @@ -9989,148 +9881,112 @@ static void ggml_cl_mul_mat_q8_0_f32_adreno(ggml_backend_t backend, const ggml_t cl_context context = backend_ctx->context; cl_kernel kernel; - // init CL objects - cl_int status; - cl_image_format img_fmt_1d; - cl_image_desc img_desc_1d; + cl_int err; + cl_image_format img_fmt; + cl_image_desc img_desc; cl_buffer_region region; - cl_mem A_image1d; - cl_mem B_image1d; - cl_mem B_sub_buffer; - cl_mem S_image1d; - // for B transpose - cl_mem B_image1d_trans = nullptr; - cl_mem B_d = nullptr; - - cl_mem D_image1d; - cl_mem D_sub_buffer; int M = ne01; int N = ne1; int K = ne00; - // create an image for A - img_fmt_1d = { CL_R, CL_FLOAT}; - memset(&img_desc_1d, 0, sizeof(img_desc_1d)); - img_desc_1d.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; - img_desc_1d.image_width = M * K / 4; // Divide by 4 for char -> float - img_desc_1d.buffer = extra0_q8_0->q; - A_image1d = clCreateImage(context, CL_MEM_READ_ONLY, &img_fmt_1d, &img_desc_1d, NULL, &status); - CL_CHECK(status); - - // create an image for Scale - img_fmt_1d = { CL_R, CL_HALF_FLOAT}; - memset(&img_desc_1d, 0, sizeof(img_desc_1d)); - img_desc_1d.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; - img_desc_1d.image_width = M * K / 32; // Block size is 32 - img_desc_1d.buffer = extra0_q8_0->d; - S_image1d = clCreateImage(context, CL_MEM_READ_ONLY, &img_fmt_1d, &img_desc_1d, NULL, &status); - CL_CHECK(status); - - // create a sub_buffer for B - region.origin = (extra1->offset); // + src1->view_offs); - region.size = K * N * sizeof(float); - B_sub_buffer = clCreateSubBuffer((extra1->data_device), 0, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &status); - CL_CHECK(status); - - // create an image for B from sub_buffer: RGBA (OCL) - img_fmt_1d = {CL_RGBA, CL_FLOAT}; - memset(&img_desc_1d, 0, sizeof(img_desc_1d)); - img_desc_1d.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; - img_desc_1d.image_width = K * N / 4; - img_desc_1d.buffer = B_sub_buffer; - B_image1d = clCreateImage(context, CL_MEM_READ_ONLY, &img_fmt_1d, &img_desc_1d, NULL, &status); - CL_CHECK(status); + if (ne1 == 1) { + cl_mem q_img = nullptr; + cl_mem b_sub_buf = nullptr; + cl_mem b_img = nullptr; - // Create subbuffer and image1d_buffer for dst - region.origin = (extrad->offset); // + dst->view_offs; - region.size = M * N * sizeof(float); - D_sub_buffer = clCreateSubBuffer((extrad->data_device), 0, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &status); - CL_CHECK(status); + // image for q + img_fmt = { CL_R, CL_UNSIGNED_INT32}; + memset(&img_desc, 0, sizeof(img_desc)); + img_desc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; + img_desc.image_width = M * K / 4; + img_desc.buffer = extra0_q8_0->q; + CL_CHECK((q_img = clCreateImage(context, CL_MEM_READ_ONLY, &img_fmt, &img_desc, NULL, &err), err)); - img_fmt_1d = {CL_R, CL_FLOAT}; - memset(&img_desc_1d, 0, sizeof(img_desc_1d)); - img_desc_1d.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; - img_desc_1d.image_width = M * N; - img_desc_1d.buffer = D_sub_buffer; - D_image1d = clCreateImage(context, CL_MEM_WRITE_ONLY, &img_fmt_1d, &img_desc_1d, NULL, &status); - CL_CHECK(status); + // create a sub_buffer for B + region.origin = offset1; + region.size = K * N * sizeof(float); + CL_CHECK((b_sub_buf = clCreateSubBuffer((extra1->data_device), 0, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &err), err)); - size_t local_work_size[3] = {1, 1, 1}; - size_t global_work_size[3] = {1, 1, 1}; + // image for activations + img_fmt = {CL_RGBA, CL_FLOAT}; + memset(&img_desc, 0, sizeof(img_desc)); + img_desc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; + img_desc.image_width = K * N / 4; + img_desc.buffer = b_sub_buf; + CL_CHECK((b_img = clCreateImage(context, CL_MEM_READ_ONLY, &img_fmt, &img_desc, NULL, &err), err)); - if (N == 1) { kernel = backend_ctx->CL_mul_mat_vec_q8_0_f32; int r2 = 1; int r3 = 1; - cl_uint k_arg = 0; - CL_CHECK(clSetKernelArg(kernel, k_arg++, sizeof(cl_mem), &A_image1d)); - CL_CHECK(clSetKernelArg(kernel, k_arg++, sizeof(cl_mem), &extra0_q8_0->d)); - CL_CHECK(clSetKernelArg(kernel, k_arg++, sizeof(cl_mem), &B_image1d)); - CL_CHECK(clSetKernelArg(kernel, k_arg++, sizeof(cl_ulong), &extra1->offset)); - CL_CHECK(clSetKernelArg(kernel, k_arg++, sizeof(cl_mem), &extrad->data_device)); - CL_CHECK(clSetKernelArg(kernel, k_arg++, sizeof(cl_ulong), &extrad->offset)); - CL_CHECK(clSetKernelArg(kernel, k_arg++, sizeof(int), &ne00)); - CL_CHECK(clSetKernelArg(kernel, k_arg++, sizeof(int), &ne01)); - CL_CHECK(clSetKernelArg(kernel, k_arg++, sizeof(int), &ne02)); - CL_CHECK(clSetKernelArg(kernel, k_arg++, sizeof(int), &ne10)); - CL_CHECK(clSetKernelArg(kernel, k_arg++, sizeof(int), &ne12)); - CL_CHECK(clSetKernelArg(kernel, k_arg++, sizeof(int), &ne0)); - CL_CHECK(clSetKernelArg(kernel, k_arg++, sizeof(int), &ne1)); - CL_CHECK(clSetKernelArg(kernel, k_arg++, sizeof(int), &r2)); - CL_CHECK(clSetKernelArg(kernel, k_arg++, sizeof(int), &r3)); + CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &q_img)); + CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), &extra0_q8_0->d)); + CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), &b_img)); + CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_ulong), &extra1->offset)); + CL_CHECK(clSetKernelArg(kernel, 4, sizeof(cl_mem), &extrad->data_device)); + CL_CHECK(clSetKernelArg(kernel, 5, sizeof(cl_ulong), &extrad->offset)); + CL_CHECK(clSetKernelArg(kernel, 6, sizeof(int), &ne00)); + CL_CHECK(clSetKernelArg(kernel, 7, sizeof(int), &ne01)); + CL_CHECK(clSetKernelArg(kernel, 8, sizeof(int), &ne02)); + CL_CHECK(clSetKernelArg(kernel, 9, sizeof(int), &ne10)); + CL_CHECK(clSetKernelArg(kernel, 10, sizeof(int), &ne12)); + CL_CHECK(clSetKernelArg(kernel, 11, sizeof(int), &ne0)); + CL_CHECK(clSetKernelArg(kernel, 12, sizeof(int), &ne1)); + CL_CHECK(clSetKernelArg(kernel, 13, sizeof(int), &r2)); + CL_CHECK(clSetKernelArg(kernel, 14, sizeof(int), &r3)); size_t wavesize = backend_ctx->adreno_wave_size; - local_work_size[0] = wavesize; - local_work_size[1] = 4; // reduce factor - local_work_size[2] = 1; + size_t local_work_size[] = { wavesize, 4, 1 }; + size_t global_work_size[] = { CEIL_DIV(M, wavesize)*wavesize, 4, 1 }; - global_work_size[0] = ((M + wavesize - 1) / wavesize) * wavesize; - global_work_size[1] = 4; // reduce factor - global_work_size[2] = 1; + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst); + + CL_CHECK(clReleaseMemObject(q_img)); + CL_CHECK(clReleaseMemObject(b_img)); + CL_CHECK(clReleaseMemObject(b_sub_buf)); } else { - cl_ulong offsetd = extrad->offset + dst->view_offs; - int padding; + cl_mem b_sub_buf = nullptr; + cl_mem b_sub_buf_trans = nullptr; + cl_mem b_img = nullptr; + cl_mem b_img_trans = nullptr; - //how many extra elements beyond multiple of 8 - int extra_elements = N % 8; + // subbuffer for activations + region.origin = offset1; + region.size = K * N * sizeof(float); + CL_CHECK((b_sub_buf = clCreateSubBuffer(extra1->data_device, 0, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &err), err)); + + // image for activations + img_fmt = {CL_RGBA, CL_FLOAT}; + memset(&img_desc, 0, sizeof(img_desc)); + img_desc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; + img_desc.image_width = K * N / 4; + img_desc.buffer = b_sub_buf; + CL_CHECK((b_img = clCreateImage(context, CL_MEM_READ_ONLY, &img_fmt, &img_desc, NULL, &err), err)); - //how much padding to add - padding = 0; + // pad N to multiple of 8 + int extra_elements = N % 8; + int padding = 0; if (extra_elements > 0){ padding = 8 - extra_elements; } - // Specify the starting offset (in bytes) + // subbuffer for transposed activations region.origin = 0; - // Specify the size of the sub-buffer (divide by 2 for FP16) region.size = K * (N + padding) * sizeof(float)/2; backend_ctx->prealloc_act_trans.allocate(context, region.size); - B_d = clCreateSubBuffer( - backend_ctx->prealloc_act_trans.buffer, - 0, - CL_BUFFER_CREATE_TYPE_REGION, - ®ion, - &status); - CL_CHECK(status); + CL_CHECK((b_sub_buf_trans = clCreateSubBuffer(backend_ctx->prealloc_act_trans.buffer, 0, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &err), err)); - cl_image_format image_format_B_d_output = { CL_RGBA, CL_HALF_FLOAT }; //(CL_HALF_FLOAT for FP16) - cl_image_desc image_desc_B_d_output = { - CL_MEM_OBJECT_IMAGE1D_BUFFER, - static_cast(K * (N + padding)/4), - 0, 0, 0, 0, 0, 0, 0, { B_d } - }; - B_image1d_trans = clCreateImage( - context, - 0, - &image_format_B_d_output, - &image_desc_B_d_output, - NULL, - &status); - CL_CHECK(status); + // image for transposed activations + img_fmt = {CL_RGBA, CL_HALF_FLOAT}; + memset(&img_desc, 0, sizeof(img_desc)); + img_desc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; + img_desc.image_width = K * (N + padding) / 4; + img_desc.buffer = b_sub_buf_trans; + CL_CHECK((b_img_trans = clCreateImage(context, 0, &img_fmt, &img_desc, NULL, &err), err)); + // transpose activations int height_B = N/4; if (height_B == 0) { height_B = 1; @@ -10139,58 +9995,39 @@ static void ggml_cl_mul_mat_q8_0_f32_adreno(ggml_backend_t backend, const ggml_t int padded_height_B = (N + padding)/4; kernel = backend_ctx->kernel_transpose_32_16; - CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &B_image1d)); - CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), &B_image1d_trans)); + CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &b_img)); + CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), &b_img_trans)); CL_CHECK(clSetKernelArg(kernel, 2, sizeof(int), &height_B)); CL_CHECK(clSetKernelArg(kernel, 3, sizeof(int), &width_B)); CL_CHECK(clSetKernelArg(kernel, 4, sizeof(int), &padded_height_B)); - size_t local_size_t[2] = { 1, 16 }; - size_t global_size_t[2] = { - static_cast(width_B), - static_cast(padded_height_B) - }; - - backend_ctx->enqueue_ndrange_kernel(kernel, 2, global_size_t, local_size_t, dst); + size_t local_work_size_t[2] = { 1, 16 }; + size_t global_work_size_t[2] = { (size_t)width_B, (size_t)padded_height_B }; + backend_ctx->enqueue_ndrange_kernel(kernel, 2, global_work_size_t, local_work_size_t, dst); + // gemm kernel = backend_ctx->kernel_mul_mm_q8_0_f32_8x4; - - int N_with_padding = N + padding; + int padded_N = N + padding; CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &extra0_q8_0->q)); CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), &extra0_q8_0->d)); - CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), &B_image1d_trans)); + CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), &b_img_trans)); CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_mem), &extrad->data_device)); CL_CHECK(clSetKernelArg(kernel, 4, sizeof(int), &K)); CL_CHECK(clSetKernelArg(kernel, 5, sizeof(int), &M)); - CL_CHECK(clSetKernelArg(kernel, 6, sizeof(int), &N_with_padding)); + CL_CHECK(clSetKernelArg(kernel, 6, sizeof(int), &padded_N)); CL_CHECK(clSetKernelArg(kernel, 7, sizeof(int), &N)); CL_CHECK(clSetKernelArg(kernel, 8, sizeof(cl_ulong), &offsetd)); - global_work_size[0] = (size_t)(N + 7) / 8; - global_work_size[1] = (size_t)(M + 3) / 4; - global_work_size[2] = 1; - - local_work_size[0] = 2; - local_work_size[1] = 128; - local_work_size[2] = 1; - } + size_t global_work_size[] = { (size_t)CEIL_DIV(N, 8), (size_t)CEIL_DIV(M, 4), 1 }; + size_t local_work_size[] = { 2, 128, 1 }; - // enqueue kernel with profiling - backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst); + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst); - // deallocate sub buffers and images - CL_CHECK(clReleaseMemObject(A_image1d)); - CL_CHECK(clReleaseMemObject(B_sub_buffer)); - CL_CHECK(clReleaseMemObject(B_image1d)); - CL_CHECK(clReleaseMemObject(S_image1d)); - CL_CHECK(clReleaseMemObject(D_sub_buffer)); - CL_CHECK(clReleaseMemObject(D_image1d)); - if (B_image1d_trans) { - CL_CHECK(clReleaseMemObject(B_image1d_trans)); - } - if (B_d) { - CL_CHECK(clReleaseMemObject(B_d)); + CL_CHECK(clReleaseMemObject(b_img_trans)); + CL_CHECK(clReleaseMemObject(b_sub_buf_trans)); + CL_CHECK(clReleaseMemObject(b_img)); + CL_CHECK(clReleaseMemObject(b_sub_buf)); } #else GGML_UNUSED(backend); diff --git a/pocs/vdot/CMakeLists.txt b/pocs/vdot/CMakeLists.txt index 6235aec1fda..f3776268ab6 100644 --- a/pocs/vdot/CMakeLists.txt +++ b/pocs/vdot/CMakeLists.txt @@ -1,9 +1,9 @@ set(TARGET llama-vdot) add_executable(${TARGET} vdot.cpp) -target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) set(TARGET llama-q8dot) add_executable(${TARGET} q8dot.cpp) -target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) diff --git a/src/llama-model.cpp b/src/llama-model.cpp index edbaf52a2f8..d9781d7d275 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -432,6 +432,7 @@ const char * llm_type_name(llm_type type) { case LLM_TYPE_26B: return "26B"; case LLM_TYPE_27B: return "27B"; case LLM_TYPE_30B: return "30B"; + case LLM_TYPE_31B: return "31B"; case LLM_TYPE_32B: return "32B"; case LLM_TYPE_34B: return "34B"; case LLM_TYPE_35B: return "35B"; @@ -466,6 +467,7 @@ const char * llm_type_name(llm_type type) { case LLM_TYPE_16B_A1B: return "16B.A1B"; case LLM_TYPE_21B_A3B: return "21B.A3B"; case LLM_TYPE_24B_A2B: return "24B.A2B"; + case LLM_TYPE_26B_A4B: return "26B.A4B"; case LLM_TYPE_30B_A3B: return "30B.A3B"; case LLM_TYPE_31B_A3_5B: return "31B.A3.5B"; case LLM_TYPE_35B_A3B: return "35B.A3B"; @@ -1624,8 +1626,10 @@ void llama_model::load_hparams(llama_model_loader & ml) { ml.get_key(LLM_KV_FINAL_LOGIT_SOFTCAPPING, hparams.f_final_logit_softcapping, false); switch (hparams.n_layer) { + case 30: type = LLM_TYPE_26B_A4B; break; case 35: type = LLM_TYPE_E2B; break; - case 42: type = LLM_TYPE_E4B; break; // to confirm: E4B or E5B? + case 42: type = LLM_TYPE_E4B; break; + case 60: type = LLM_TYPE_31B; break; default: type = LLM_TYPE_UNKNOWN; } } break; diff --git a/src/llama-model.h b/src/llama-model.h index bba70012e11..67349e2d6ff 100644 --- a/src/llama-model.h +++ b/src/llama-model.h @@ -84,6 +84,7 @@ enum llm_type { LLM_TYPE_26B, LLM_TYPE_27B, LLM_TYPE_30B, + LLM_TYPE_31B, LLM_TYPE_32B, LLM_TYPE_34B, LLM_TYPE_35B, @@ -118,6 +119,7 @@ enum llm_type { LLM_TYPE_16B_A1B, LLM_TYPE_21B_A3B, // Ernie MoE small LLM_TYPE_24B_A2B, // lfm2moe + LLM_TYPE_26B_A4B, // Gemma4 LLM_TYPE_30B_A3B, LLM_TYPE_31B_A3_5B, LLM_TYPE_35B_A3B, // Qwen3.5 diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index cd4bc5ef1d3..b282c3239f0 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -10,7 +10,7 @@ function(llama_build source) endif() add_executable(${TEST_TARGET} ${TEST_SOURCES}) - target_link_libraries(${TEST_TARGET} PRIVATE common) + target_link_libraries(${TEST_TARGET} PRIVATE llama llama-common) if (LLAMA_TESTS_INSTALL) install(TARGETS ${TEST_TARGET} RUNTIME) endif() @@ -105,7 +105,7 @@ function(llama_build_and_test source) if (LLAMA_TESTS_INSTALL) install(TARGETS ${TEST_TARGET} RUNTIME) endif() - target_link_libraries(${TEST_TARGET} PRIVATE common) + target_link_libraries(${TEST_TARGET} PRIVATE llama-common) add_test( NAME ${TEST_TARGET} @@ -269,11 +269,11 @@ if (TARGET cpp-httplib) get_target_property(_cpp_httplib_defs cpp-httplib INTERFACE_COMPILE_DEFINITIONS) if (_cpp_httplib_defs MATCHES "CPPHTTPLIB_OPENSSL_SUPPORT") add_library(gguf-model-data STATIC gguf-model-data.cpp) - target_link_libraries(gguf-model-data PRIVATE common cpp-httplib) + target_link_libraries(gguf-model-data PRIVATE llama-common cpp-httplib) target_include_directories(gguf-model-data PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) add_executable(test-gguf-model-data test-gguf-model-data.cpp) - target_link_libraries(test-gguf-model-data PRIVATE gguf-model-data common) + target_link_libraries(test-gguf-model-data PRIVATE gguf-model-data llama-common) llama_test(test-gguf-model-data LABEL "model") # test-quant-type-selection requires gguf-model-data for remote model metadata diff --git a/tests/test-quantize-stats.cpp b/tests/test-quantize-stats.cpp index de587d456d0..e53a7b35531 100644 --- a/tests/test-quantize-stats.cpp +++ b/tests/test-quantize-stats.cpp @@ -1,10 +1,13 @@ -#include "ggml.h" -#include "ggml-cpu.h" #include "llama.h" + +#include "build-info.h" #include "common.h" #include "../src/llama-model.h" +#include "ggml.h" +#include "ggml-cpu.h" + #include #include #include @@ -298,7 +301,7 @@ int main(int argc, char ** argv) { return 1; } - print_build_info(); + llama_print_build_info(); // load the model fprintf(stderr, "Loading model\n"); diff --git a/tools/batched-bench/CMakeLists.txt b/tools/batched-bench/CMakeLists.txt index 4a46b57a528..f9ffd2d4ce7 100644 --- a/tools/batched-bench/CMakeLists.txt +++ b/tools/batched-bench/CMakeLists.txt @@ -1,6 +1,6 @@ set(TARGET llama-batched-bench) add_executable(${TARGET} batched-bench.cpp) -target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) if(LLAMA_TOOLS_INSTALL) diff --git a/tools/cli/CMakeLists.txt b/tools/cli/CMakeLists.txt index b08fff4c289..7e01abb81b9 100644 --- a/tools/cli/CMakeLists.txt +++ b/tools/cli/CMakeLists.txt @@ -1,6 +1,6 @@ set(TARGET llama-cli) add_executable(${TARGET} cli.cpp) -target_link_libraries(${TARGET} PRIVATE server-context PUBLIC common ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE server-context PUBLIC llama-common ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) include_directories(../server) diff --git a/tools/completion/CMakeLists.txt b/tools/completion/CMakeLists.txt index 126ae6ab3d0..2c7df80652c 100644 --- a/tools/completion/CMakeLists.txt +++ b/tools/completion/CMakeLists.txt @@ -1,6 +1,6 @@ set(TARGET llama-completion) add_executable(${TARGET} completion.cpp) -target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) if(LLAMA_TOOLS_INSTALL) diff --git a/tools/cvector-generator/CMakeLists.txt b/tools/cvector-generator/CMakeLists.txt index baeb4d00c14..c0f2c240705 100644 --- a/tools/cvector-generator/CMakeLists.txt +++ b/tools/cvector-generator/CMakeLists.txt @@ -1,6 +1,6 @@ set(TARGET llama-cvector-generator) add_executable(${TARGET} cvector-generator.cpp pca.hpp) -target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) if(LLAMA_TOOLS_INSTALL) diff --git a/tools/cvector-generator/cvector-generator.cpp b/tools/cvector-generator/cvector-generator.cpp index fd6e5ddd2d8..8c6b3d868d2 100644 --- a/tools/cvector-generator/cvector-generator.cpp +++ b/tools/cvector-generator/cvector-generator.cpp @@ -2,6 +2,7 @@ #include "gguf.h" #include "arg.h" +#include "build-info.h" #include "common.h" #include "llama.h" #include "pca.hpp" @@ -420,7 +421,7 @@ int main(int argc, char ** argv) { params.cb_eval_user_data = &cb_data; params.warmup = false; - print_build_info(); + llama_print_build_info(); llama_backend_init(); llama_numa_init(params.numa); diff --git a/tools/export-lora/CMakeLists.txt b/tools/export-lora/CMakeLists.txt index cddfa77f02b..b122a875230 100644 --- a/tools/export-lora/CMakeLists.txt +++ b/tools/export-lora/CMakeLists.txt @@ -1,6 +1,6 @@ set(TARGET llama-export-lora) add_executable(${TARGET} export-lora.cpp) -target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) if(LLAMA_TOOLS_INSTALL) diff --git a/tools/fit-params/CMakeLists.txt b/tools/fit-params/CMakeLists.txt index 34c3373f83c..25c40966333 100644 --- a/tools/fit-params/CMakeLists.txt +++ b/tools/fit-params/CMakeLists.txt @@ -1,6 +1,6 @@ set(TARGET llama-fit-params) add_executable(${TARGET} fit-params.cpp) -target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) if(LLAMA_TOOLS_INSTALL) diff --git a/tools/gguf-split/CMakeLists.txt b/tools/gguf-split/CMakeLists.txt index 9b2125087c5..b40e07ab5aa 100644 --- a/tools/gguf-split/CMakeLists.txt +++ b/tools/gguf-split/CMakeLists.txt @@ -1,6 +1,6 @@ set(TARGET llama-gguf-split) add_executable(${TARGET} gguf-split.cpp) -target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) if(LLAMA_TOOLS_INSTALL) diff --git a/tools/gguf-split/gguf-split.cpp b/tools/gguf-split/gguf-split.cpp index f99f0299b9c..8a6b5c198b2 100644 --- a/tools/gguf-split/gguf-split.cpp +++ b/tools/gguf-split/gguf-split.cpp @@ -1,8 +1,11 @@ -#include "ggml.h" -#include "gguf.h" #include "llama.h" + +#include "build-info.h" #include "common.h" +#include "ggml.h" +#include "gguf.h" + #include #include #include @@ -101,8 +104,8 @@ static void split_params_parse_ex(int argc, const char ** argv, split_params & p split_print_usage(argv[0]); exit(0); } else if (arg == "--version") { - fprintf(stderr, "version: %d (%s)\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT); - fprintf(stderr, "built with %s for %s\n", LLAMA_COMPILER, LLAMA_BUILD_TARGET); + fprintf(stderr, "version: %d (%s)\n", llama_build_number(), llama_commit()); + fprintf(stderr, "built with %s for %s\n", llama_compiler(), llama_build_target()); exit(0); } else if (arg == "--dry-run") { arg_found = true; diff --git a/tools/imatrix/CMakeLists.txt b/tools/imatrix/CMakeLists.txt index 5af6263f985..361c4577d85 100644 --- a/tools/imatrix/CMakeLists.txt +++ b/tools/imatrix/CMakeLists.txt @@ -1,6 +1,6 @@ set(TARGET llama-imatrix) add_executable(${TARGET} imatrix.cpp) -target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) if(LLAMA_TOOLS_INSTALL) diff --git a/tools/llama-bench/CMakeLists.txt b/tools/llama-bench/CMakeLists.txt index b8543a9692f..93d6a3aa2e7 100644 --- a/tools/llama-bench/CMakeLists.txt +++ b/tools/llama-bench/CMakeLists.txt @@ -1,6 +1,6 @@ set(TARGET llama-bench) add_executable(${TARGET} llama-bench.cpp) -target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) if(LLAMA_TOOLS_INSTALL) diff --git a/tools/llama-bench/llama-bench.cpp b/tools/llama-bench/llama-bench.cpp index b15a26a987b..59920ab516b 100644 --- a/tools/llama-bench/llama-bench.cpp +++ b/tools/llama-bench/llama-bench.cpp @@ -19,6 +19,7 @@ #include #include +#include "build-info.h" #include "common.h" #include "download.h" #include "ggml.h" @@ -1624,8 +1625,8 @@ struct test { } }; -const std::string test::build_commit = LLAMA_COMMIT; -const int test::build_number = LLAMA_BUILD_NUMBER; +const std::string test::build_commit = llama_commit(); +const int test::build_number = llama_build_number(); struct printer { virtual ~printer() {} diff --git a/tools/mtmd/CMakeLists.txt b/tools/mtmd/CMakeLists.txt index 3bafde178de..e5ad9b81b82 100644 --- a/tools/mtmd/CMakeLists.txt +++ b/tools/mtmd/CMakeLists.txt @@ -86,12 +86,12 @@ if (TARGET BUILD_INFO) add_dependencies(mtmd-helper BUILD_INFO) endif() -# if mtmd is linked against common, we throw an error +# if mtmd is linked against llama-common, we throw an error if (TARGET mtmd) get_target_property(libs mtmd LINK_LIBRARIES) - if (libs AND "common" IN_LIST libs) + if (libs AND "llama-common" IN_LIST libs) message(FATAL_ERROR "mtmd is designed to be a public library.\n" - "It must not link against common") + "It must not link against llama-common") endif() endif() @@ -106,11 +106,11 @@ set_target_properties (${TARGET} PROPERTIES OUTPUT_NAME llama-mtmd-cli) if(LLAMA_TOOLS_INSTALL) install(TARGETS ${TARGET} RUNTIME) endif() -target_link_libraries (${TARGET} PRIVATE common mtmd Threads::Threads) +target_link_libraries (${TARGET} PRIVATE llama-common mtmd Threads::Threads) target_compile_features(${TARGET} PRIVATE cxx_std_17) # mtmd-debug tool add_executable(llama-mtmd-debug debug/mtmd-debug.cpp) set_target_properties(llama-mtmd-debug PROPERTIES OUTPUT_NAME llama-mtmd-debug) -target_link_libraries(llama-mtmd-debug PRIVATE common mtmd Threads::Threads) +target_link_libraries(llama-mtmd-debug PRIVATE llama-common mtmd Threads::Threads) target_compile_features(llama-mtmd-debug PRIVATE cxx_std_17) diff --git a/tools/parser/CMakeLists.txt b/tools/parser/CMakeLists.txt index 55e0c634375..a8df0e7e6e3 100644 --- a/tools/parser/CMakeLists.txt +++ b/tools/parser/CMakeLists.txt @@ -2,7 +2,7 @@ if (NOT WIN32 OR NOT BUILD_SHARED_LIBS) # this tool is disabled on Windows when building with shared libraries because it uses internal functions not exported with LLAMA_API set(TARGET llama-debug-template-parser) add_executable(${TARGET} debug-template-parser.cpp) - target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) + target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) if(LLAMA_TOOLS_INSTALL) @@ -12,7 +12,7 @@ endif() set(TARGET llama-template-analysis) add_executable(${TARGET} template-analysis.cpp) -target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) if(LLAMA_TOOLS_INSTALL) diff --git a/tools/perplexity/CMakeLists.txt b/tools/perplexity/CMakeLists.txt index 12b28b2be43..0c194ee7f08 100644 --- a/tools/perplexity/CMakeLists.txt +++ b/tools/perplexity/CMakeLists.txt @@ -1,6 +1,6 @@ set(TARGET llama-perplexity) add_executable(${TARGET} perplexity.cpp) -target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) if(LLAMA_TOOLS_INSTALL) diff --git a/tools/quantize/CMakeLists.txt b/tools/quantize/CMakeLists.txt index bd9ddbd67da..965adc0059b 100644 --- a/tools/quantize/CMakeLists.txt +++ b/tools/quantize/CMakeLists.txt @@ -1,6 +1,6 @@ set(TARGET llama-quantize) add_executable(${TARGET} quantize.cpp) -target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) target_include_directories(${TARGET} PRIVATE ../../common) target_compile_features(${TARGET} PRIVATE cxx_std_17) diff --git a/tools/quantize/quantize.cpp b/tools/quantize/quantize.cpp index a882c78f1bd..3d33d47d98b 100644 --- a/tools/quantize/quantize.cpp +++ b/tools/quantize/quantize.cpp @@ -1,5 +1,8 @@ -#include "common.h" #include "llama.h" + +#include "build-info.h" +#include "common.h" + #include "gguf.h" #include @@ -709,7 +712,7 @@ int main(int argc, char ** argv) { } } - print_build_info(); + llama_print_build_info(); if (params.dry_run) { fprintf(stderr, "%s: calculating quantization size for '%s' as %s", __func__, fname_inp.c_str(), ftype_str.c_str()); diff --git a/tools/results/CMakeLists.txt b/tools/results/CMakeLists.txt index 2843b8488a1..643eb029277 100644 --- a/tools/results/CMakeLists.txt +++ b/tools/results/CMakeLists.txt @@ -1,6 +1,6 @@ set(TARGET llama-results) add_executable(${TARGET} results.cpp) -target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) if(LLAMA_TOOLS_INSTALL) diff --git a/tools/server/CMakeLists.txt b/tools/server/CMakeLists.txt index 451a045fe0d..0cce99f5968 100644 --- a/tools/server/CMakeLists.txt +++ b/tools/server/CMakeLists.txt @@ -23,7 +23,7 @@ endif() target_include_directories(${TARGET} PRIVATE ../mtmd) target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR}) -target_link_libraries(${TARGET} PUBLIC common mtmd ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PUBLIC llama-common mtmd ${CMAKE_THREAD_LIBS_INIT}) # llama-server executable @@ -68,6 +68,6 @@ install(TARGETS ${TARGET} RUNTIME) target_include_directories(${TARGET} PRIVATE ../mtmd) target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR}) -target_link_libraries(${TARGET} PRIVATE server-context PUBLIC common cpp-httplib ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE server-context PUBLIC llama-common cpp-httplib ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp index 41bdad6f878..4b899ecf007 100644 --- a/tools/server/server-context.cpp +++ b/tools/server/server-context.cpp @@ -4,6 +4,7 @@ #include "server-task.h" #include "server-queue.h" +#include "build-info.h" #include "common.h" #include "llama.h" #include "log.h" @@ -3010,7 +3011,7 @@ server_context_meta server_context::get_meta() const { auto eos_token_str = eos_id != LLAMA_TOKEN_NULL ? common_token_to_piece(impl->ctx, eos_id, true) : ""; return server_context_meta { - /* build_info */ build_info, + /* build_info */ std::string(llama_build_info()), /* model_name */ impl->model_name, /* model_aliases */ impl->model_aliases, /* model_tags */ impl->model_tags, diff --git a/tools/server/server-models.cpp b/tools/server/server-models.cpp index 5667c98ef8a..a1eeec30e99 100644 --- a/tools/server/server-models.cpp +++ b/tools/server/server-models.cpp @@ -1,6 +1,7 @@ #include "server-common.h" #include "server-models.h" +#include "build-info.h" #include "preset.h" #include "download.h" @@ -936,7 +937,7 @@ void server_models_routes::init_routes() { {"n_ctx", 0}, }}, {"webui_settings", webui_settings}, - {"build_info", build_info}, + {"build_info", std::string(llama_build_info())}, }); return res; } diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp index 0312f098a32..4fb953b4920 100644 --- a/tools/server/server-task.cpp +++ b/tools/server/server-task.cpp @@ -1,5 +1,6 @@ #include "server-task.h" +#include "build-info.h" #include "chat.h" #include "common.h" #include "json-schema-to-grammar.h" @@ -791,7 +792,7 @@ json server_task_result_cmpl_final::to_json_oaicompat() { })}, {"created", t}, {"model", oaicompat_model}, - {"system_fingerprint", build_info}, + {"system_fingerprint", std::string(llama_build_info())}, {"object", "text_completion"}, {"usage", usage_json_oaicompat()}, {"id", oaicompat_cmpl_id} @@ -839,7 +840,7 @@ json server_task_result_cmpl_final::to_json_oaicompat_chat() { {"choices", json::array({choice})}, {"created", t}, {"model", oaicompat_model}, - {"system_fingerprint", build_info}, + {"system_fingerprint", std::string(llama_build_info())}, {"object", "chat.completion"}, {"usage", usage_json_oaicompat()}, {"id", oaicompat_cmpl_id} @@ -876,7 +877,7 @@ json server_task_result_cmpl_final::to_json_oaicompat_chat_stream() { {"created", t}, {"id", oaicompat_cmpl_id}, {"model", oaicompat_model}, - {"system_fingerprint", build_info}, + {"system_fingerprint", std::string(llama_build_info())}, {"object", "chat.completion.chunk"}, }); } @@ -892,7 +893,7 @@ json server_task_result_cmpl_final::to_json_oaicompat_chat_stream() { {"created", t}, {"id", oaicompat_cmpl_id}, {"model", oaicompat_model}, - {"system_fingerprint", build_info}, + {"system_fingerprint", std::string(llama_build_info())}, {"object", "chat.completion.chunk"}, }); @@ -904,7 +905,7 @@ json server_task_result_cmpl_final::to_json_oaicompat_chat_stream() { {"created", t}, {"id", oaicompat_cmpl_id}, {"model", oaicompat_model}, - {"system_fingerprint", build_info}, + {"system_fingerprint", std::string(llama_build_info())}, {"object", "chat.completion.chunk"}, {"usage", usage_json_oaicompat()}, }); @@ -1469,7 +1470,7 @@ json server_task_result_cmpl_partial::to_json_oaicompat() { })}, {"created", t}, {"model", oaicompat_model}, - {"system_fingerprint", build_info}, + {"system_fingerprint", std::string(llama_build_info())}, {"object", "text_completion"}, {"id", oaicompat_cmpl_id} }; @@ -1506,7 +1507,7 @@ json server_task_result_cmpl_partial::to_json_oaicompat_chat() { {"created", t}, {"id", oaicompat_cmpl_id}, {"model", oaicompat_model}, - {"system_fingerprint", build_info}, + {"system_fingerprint", std::string(llama_build_info())}, {"object", "chat.completion.chunk"}, }); }; diff --git a/tools/server/server.cpp b/tools/server/server.cpp index fe640b978be..06318463fd4 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -5,6 +5,7 @@ #include "server-tools.h" #include "arg.h" +#include "build-info.h" #include "common.h" #include "llama.h" #include "log.h" @@ -108,7 +109,7 @@ int main(int argc, char ** argv) { llama_backend_init(); llama_numa_init(params.numa); - LOG_INF("build_info: %s\n", build_info.c_str()); + LOG_INF("build_info: %s\n", llama_build_info()); LOG_INF("%s\n", common_params_get_system_info(params).c_str()); server_http_context ctx_http; diff --git a/tools/tokenize/CMakeLists.txt b/tools/tokenize/CMakeLists.txt index feed9a10622..1e183657585 100644 --- a/tools/tokenize/CMakeLists.txt +++ b/tools/tokenize/CMakeLists.txt @@ -3,5 +3,5 @@ add_executable(${TARGET} tokenize.cpp) if(LLAMA_TOOLS_INSTALL) install(TARGETS ${TARGET} RUNTIME) endif() -target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) diff --git a/tools/tts/CMakeLists.txt b/tools/tts/CMakeLists.txt index 76320d4c2d6..26a8bb8f2d1 100644 --- a/tools/tts/CMakeLists.txt +++ b/tools/tts/CMakeLists.txt @@ -1,6 +1,6 @@ set(TARGET llama-tts) add_executable(${TARGET} tts.cpp) -target_link_libraries(${TARGET} PRIVATE llama common ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE llama llama-common ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) if(LLAMA_TOOLS_INSTALL) diff --git a/vendor/cpp-httplib/CMakeLists.txt b/vendor/cpp-httplib/CMakeLists.txt index 78dc4833226..28485a0ce80 100644 --- a/vendor/cpp-httplib/CMakeLists.txt +++ b/vendor/cpp-httplib/CMakeLists.txt @@ -5,6 +5,8 @@ find_package(Threads REQUIRED) llama_add_compile_flags() +set(CMAKE_POSITION_INDEPENDENT_CODE ON) + add_library(${TARGET} STATIC httplib.cpp httplib.h) # disable warnings in 3rd party code