Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove onnxruntime-genai-static library (non trivial change) #1264

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 5 additions & 21 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -87,24 +87,19 @@ if(WIN32)
target_compile_definitions(onnxruntime-genai PRIVATE VERSION_PATCH=${VERSION_PATCH})
target_compile_definitions(onnxruntime-genai PRIVATE VERSION_SUFFIX=${VERSION_SUFFIX})
target_compile_definitions(onnxruntime-genai PRIVATE FILE_NAME=\"onnxruntime-genai.dll\")

add_library(onnxruntime-genai-static STATIC ${generator_srcs})
else()
add_library(onnxruntime-genai SHARED ${generator_srcs})
add_library(onnxruntime-genai-static STATIC ${generator_srcs})
endif()

target_include_directories(onnxruntime-genai PRIVATE ${ORT_HEADER_DIR})
target_include_directories(onnxruntime-genai-static PRIVATE ${ORT_HEADER_DIR})
target_include_directories(onnxruntime-genai PRIVATE ${onnxruntime_extensions_SOURCE_DIR}/include)
target_include_directories(onnxruntime-genai PRIVATE ${onnxruntime_extensions_SOURCE_DIR}/shared/api/)
target_include_directories(onnxruntime-genai-static PRIVATE ${onnxruntime_extensions_SOURCE_DIR}/include)
target_include_directories(onnxruntime-genai-static PUBLIC ${onnxruntime_extensions_SOURCE_DIR}/shared/api/)
target_link_libraries(onnxruntime-genai PRIVATE onnxruntime_extensions)
target_link_libraries(onnxruntime-genai-static PUBLIC onnxruntime_extensions)
target_link_directories(onnxruntime-genai PRIVATE ${ORT_LIB_DIR})
target_link_libraries(onnxruntime-genai PRIVATE Threads::Threads)
target_link_libraries(onnxruntime-genai-static PUBLIC Threads::Threads)

# The genai library itself is always embedded in the shared library
list(APPEND ortgenai_embed_libs "$<TARGET_FILE:onnxruntime-genai>")

# we keep the shared libraries disconnected on Android as they will come from separate AARs and we don't want to force
# the ORT version to match in both.
Expand Down Expand Up @@ -146,30 +141,20 @@ if(CMAKE_GENERATOR_TOOLSET MATCHES "Visual Studio")
endif()

if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
set_target_properties(onnxruntime-genai-static PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_link_libraries(onnxruntime-genai-static PRIVATE dl) # For dlopen & co
set_target_properties(onnxruntime-genai PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_link_libraries(onnxruntime-genai PRIVATE dl) # For dlopen & co
endif()


if(USE_DML)
list(APPEND ortgenai_embed_libs "${D3D12_LIB_DIR}/D3D12Core.dll")
target_include_directories(onnxruntime-genai PRIVATE $<TARGET_PROPERTY:${WIL_TARGET},INTERFACE_INCLUDE_DIRECTORIES>)
target_include_directories(onnxruntime-genai PRIVATE $<TARGET_PROPERTY:${DIRECTX_HEADERS_TARGET},INTERFACE_INCLUDE_DIRECTORIES>/directx)
target_include_directories(onnxruntime-genai PRIVATE $<TARGET_PROPERTY:${DIRECTX_HEADERS_TARGET},INTERFACE_INCLUDE_DIRECTORIES>)
target_include_directories(onnxruntime-genai-static PUBLIC $<TARGET_PROPERTY:${WIL_TARGET},INTERFACE_INCLUDE_DIRECTORIES>)
target_include_directories(onnxruntime-genai-static PUBLIC $<TARGET_PROPERTY:${DIRECTX_HEADERS_TARGET},INTERFACE_INCLUDE_DIRECTORIES>/directx)
target_include_directories(onnxruntime-genai-static PUBLIC $<TARGET_PROPERTY:${DIRECTX_HEADERS_TARGET},INTERFACE_INCLUDE_DIRECTORIES>)
target_include_directories(onnxruntime-genai PRIVATE ${DML_HEADER_DIR})
target_include_directories(onnxruntime-genai-static PRIVATE ${DML_HEADER_DIR})
target_include_directories(onnxruntime-genai PRIVATE ${D3D12_HEADER_DIR})
target_include_directories(onnxruntime-genai-static PRIVATE ${D3D12_HEADER_DIR})
target_link_directories(onnxruntime-genai PRIVATE ${DML_LIB_DIR})
target_link_directories(onnxruntime-genai PRIVATE ${D3D12_LIB_DIR})
target_link_directories(onnxruntime-genai-static PUBLIC ${DML_LIB_DIR})
target_link_directories(onnxruntime-genai-static PUBLIC ${D3D12_LIB_DIR})
target_link_libraries(onnxruntime-genai PRIVATE d3d12.lib dxcore.lib dxguid.lib dxgi.lib)
target_link_libraries(onnxruntime-genai PRIVATE d3d12.lib dxcore.lib dxguid.lib dxgi.lib)
target_link_libraries(onnxruntime-genai-static PUBLIC d3d12.lib dxcore.lib dxguid.lib dxgi.lib)

get_filename_component(PACKAGES_DIR ${CMAKE_CURRENT_BINARY_DIR}/_deps ABSOLUTE)
set(DXC_PACKAGE_DIR ${PACKAGES_DIR}/Microsoft.Direct3D.DXC.1.7.2308.12)
Expand All @@ -194,7 +179,6 @@ if(USE_DML)

add_dependencies(RESTORE_PACKAGES nuget)
add_dependencies(onnxruntime-genai RESTORE_PACKAGES)
add_dependencies(onnxruntime-genai-static RESTORE_PACKAGES)
endif()

if(ANDROID)
Expand Down
2 changes: 1 addition & 1 deletion benchmark/c/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ target_include_directories(model_benchmark PRIVATE
${CMAKE_SOURCE_DIR}/src # directory containing the ort_genai headers
)

target_link_libraries(model_benchmark PRIVATE onnxruntime-genai-static ${ONNXRUNTIME_LIB})
target_link_libraries(model_benchmark PRIVATE onnxruntime-genai ${ONNXRUNTIME_LIB})

target_link_directories(model_benchmark PRIVATE ${ORT_LIB_DIR})

Expand Down
18 changes: 16 additions & 2 deletions src/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -610,11 +610,19 @@ struct Search_Element : JSON::Element {
};

void SetSearchNumber(Config::Search& search, std::string_view name, double value) {
Search_Element(search).OnValue(name, value);
try {
Search_Element(search).OnValue(name, value);
} catch (...) {
JSON::TranslateException(name);
}
}

void SetSearchBool(Config::Search& search, std::string_view name, bool value) {
Search_Element(search).OnValue(name, value);
try {
Search_Element(search).OnValue(name, value);
} catch (...) {
JSON::TranslateException(name);
}
}

void ClearProviders(Config& config) {
Expand Down Expand Up @@ -712,6 +720,12 @@ void ParseConfig(const fs::path& filename, std::string_view json_overlay, Config
}
}

void OverlayConfig(Config& config, std::string_view json) {
Root_Element root{config};
RootObject_Element element{root};
JSON::Parse(element, json);
}

Config::Config(const fs::path& path, std::string_view json_overlay) : config_path{path} {
ParseConfig(path / "genai_config.json", json_overlay, *this);

Expand Down
1 change: 1 addition & 0 deletions src/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ void SetSearchNumber(Config::Search& search, std::string_view name, double value
void SetSearchBool(Config::Search& search, std::string_view name, bool value);
void ClearProviders(Config& config);
void SetProviderOption(Config& config, std::string_view provider_name, std::string_view option_name, std::string_view option_value);
void OverlayConfig(Config& config, std::string_view json);
bool IsCudaGraphEnabled(Config::SessionOptions& session_options);

} // namespace Generators
78 changes: 57 additions & 21 deletions src/generators.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,37 +128,73 @@ struct GenaiInterfaceImpl : GenaiInterface {
void Sequences_RewindTo(Sequences* p_this, size_t new_length) override { return p_this->RewindTo(new_length); }
} g_genai;

DeviceInterface* GetCudaInterface() {
// Load the shared library onnxruntime-genai-cuda.dll
// This is a workaround to avoid linking the CUDA library to the generator library
// The CUDA library is only needed for the CUDA allocator
#if defined(_WIN32)
static std::unique_ptr<void, void (*)(void*)> cuda_library{LoadLibrary((CurrentModulePath() + "onnxruntime-genai-cuda.dll").c_str()),
[](void* h) { FreeLibrary(reinterpret_cast<HMODULE>(h)); }};
struct LibraryHandle {
LibraryHandle(const char* filename) {
auto path = CurrentModulePath() + filename;
handle_ = LoadLibrary(path.c_str());
if (!handle_)
throw std::runtime_error(std::string("Failed to load library: ") + path + " Error: " + std::to_string(GetLastError()));
};

~LibraryHandle() { FreeLibrary(handle_); }

FARPROC __stdcall GetSymbol(const char* name) { return ::GetProcAddress(handle_, name); }

operator HANDLE() { return handle_; }

private:
HMODULE handle_{};
};
#elif defined(__linux__) && !defined(__ANDROID__)
static std::unique_ptr<void, void (*)(void*)> cuda_library{dlopen((Ort::GetCurrentModuleDir() + "/libonnxruntime-genai-cuda.so").c_str(), RTLD_NOW | RTLD_DEEPBIND),
[](void* h) { dlclose(h); }};
struct LibraryHandle {
LibraryHandle(const char* filename) {
auto path = Ort::GetCurrentModuleDir() + "/" + filename;
handle_ = dlopen(path.c_str(), RTLD_NOW | RTLD_LOCAL);
if (!handle_)
throw std::runtime_error(std::string("Failed to load library: ") + dlerror()); // dlerror() includes the path
}
~LibraryHandle() {
dlclose(handle_);
}

void* GetSymbol(const char* name) { return ::dlsym(handle_, name); }

operator void*() { return handle_; }

private:
void* handle_{};
};
#else
static std::unique_ptr<void, void (*)(void*)> cuda_library{nullptr, [](void* h) {}};
#endif
struct LibraryHandle {
LibraryHandle(const char* filename) {}
~LibraryHandle() {}

if (!cuda_library) {
throw std::runtime_error("Cuda interface not available.");
}
void* GetSymbol(const char* name) { return nullptr; }

operator bool() { return false; }
};
#endif

Generators::DeviceInterface* GetInterface(GenaiInterface * p_genai);
static DeviceInterface* cuda_interface{[] {
DeviceInterface* GetCudaInterface() {
try {
#if defined(_WIN32)
auto get_cuda_fn = reinterpret_cast<decltype(&GetInterface)>(GetProcAddress(reinterpret_cast<HMODULE>(cuda_library.get()), "GetInterface"));
static LibraryHandle library{"onnxruntime-genai-cuda.dll"};
#elif defined(__linux__) && !defined(__ANDROID__)
auto get_cuda_fn = reinterpret_cast<decltype(&GetInterface)>(dlsym(cuda_library.get(), "GetInterface"));
static LibraryHandle library{"libonnxruntime-genai-cuda.so"};
#else
auto get_cuda_fn = [](GenaiInterface*) { return nullptr; };
static LibraryHandle library{""};
#endif
return get_cuda_fn(&g_genai);
}()};
if (!library)
throw std::runtime_error("Shared library load failure (see first error)");

return cuda_interface;
Generators::DeviceInterface* GetInterface(GenaiInterface * p_genai);
static DeviceInterface* cuda_interface = reinterpret_cast<decltype(&GetInterface)>(library.GetSymbol("GetInterface"))(&g_genai);

return cuda_interface;
} catch (const std::exception& e) {
throw std::runtime_error("Cuda interface not available: " + std::string(e.what()));
}
}

std::string to_string(DeviceType device_type) {
Expand Down
18 changes: 14 additions & 4 deletions src/json.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,18 @@ void Parse(Element& element, std::string_view document) {
JSON{element, document};
}

void TranslateException(std::string_view name) {
try {
throw;
} catch (const unknown_value_error&) {
throw std::runtime_error(" Unknown value \"" + std::string(name) + "\"");
} catch (const type_mismatch& e) {
throw std::runtime_error(std::string(name) + " - Expected a " + std::string(value_names[e.expected]) + " but saw a " + std::string(value_names[e.seen]));
} catch (...) {
throw;
}
}

JSON::JSON(Element& element, std::string_view document) : begin_{document.data()}, end_{document.data() + document.size()} {
try {
Parse_Value(element, {});
Expand Down Expand Up @@ -167,14 +179,12 @@ void JSON::Parse_Value(Element& element, std::string_view name) {
throw unknown_value_error{};
break;
}
} catch (const unknown_value_error&) {
throw std::runtime_error(" Unknown value \"" + std::string(name) + "\"");
} catch (const type_mismatch& e) {
throw std::runtime_error(std::string(name) + " - Expected a " + std::string(value_names[e.expected]) + " but saw a " + std::string(value_names[e.seen]));
} catch (const std::runtime_error& e) {
if (!name.empty())
throw std::runtime_error(std::string(name) + ":" + e.what());
throw;
} catch (...) {
TranslateException(name);
}

Parse_Whitespace();
Expand Down
1 change: 1 addition & 0 deletions src/json.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,5 @@ struct Element {
};

void Parse(Element& element, std::string_view document);
void TranslateException(std::string_view name); // Translate JSON exceptions into std::runtime_exception with a useful message
} // namespace JSON
5 changes: 4 additions & 1 deletion src/models/audio_processor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,10 @@ std::unique_ptr<OrtValue> ProcessMel(ort_extensions::OrtxObjectPtr<OrtxTensor>&

} // namespace

std::unique_ptr<Audios> LoadAudios(const std::span<const char* const>& audio_paths) {
std::unique_ptr<Audios> LoadAudios(std::span<const char* const> audio_paths) {
if (audio_paths.empty())
throw std::runtime_error("No audios provided");

for (const char* audio_path : audio_paths) {
if (!fs::path(audio_path).exists()) {
throw std::runtime_error("Audio path does not exist: " + std::string(audio_path));
Expand Down
2 changes: 1 addition & 1 deletion src/models/audio_processor.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ struct Audios {
size_t num_audios_{};
};

std::unique_ptr<Audios> LoadAudios(const std::span<const char* const>& audio_paths);
std::unique_ptr<Audios> LoadAudios(std::span<const char* const> audio_paths);

struct AudioProcessor {
AudioProcessor(Config& config, const SessionInfo& session_info);
Expand Down
18 changes: 18 additions & 0 deletions src/models/model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,24 @@ std::vector<int32_t> Tokenizer::EncodeBatch(std::span<const std::string> strings
return PadInputs(span_sequences, pad_token_id_);
}

std::shared_ptr<Tensor> Tokenizer::EncodeBatch(std::span<const char*> strings) const {
std::vector<std::vector<int32_t>> sequences;
std::vector<std::span<const int32_t>> span_sequences;
for (size_t i = 0; i < strings.size(); i++) {
sequences.emplace_back(Encode(strings[i]));
span_sequences.emplace_back(sequences.back());
}

auto encoded = PadInputs(span_sequences, pad_token_id_); // TODO: Pad directly into tensor vs copying?

auto tensor = std::make_shared<Tensor>();
auto shape = std::array<int64_t, 2>{static_cast<int64_t>(strings.size()), static_cast<int64_t>(encoded.size() / strings.size())};
tensor->ort_tensor_ = OrtValue::CreateTensor<int32_t>(Ort::Allocator::GetWithDefaultOptions(), shape);
std::copy(encoded.begin(), encoded.end(), tensor->ort_tensor_->GetTensorMutableData<int32_t>());

return tensor;
}

std::vector<std::string> Tokenizer::DecodeBatch(std::span<const int32_t> sequences, size_t count) const {
if (sequences.size() % count != 0)
throw std::runtime_error("DecodeBatch: sequences must be evenly divisible by the count");
Expand Down
1 change: 1 addition & 0 deletions src/models/model.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ struct Tokenizer : std::enable_shared_from_this<Tokenizer>, LeakChecked<Tokenize
std::string Decode(std::span<const int32_t> tokens) const;

std::vector<int32_t> EncodeBatch(std::span<const std::string> strings) const;
std::shared_ptr<Tensor> EncodeBatch(std::span<const char*> strings) const;
std::vector<std::string> DecodeBatch(std::span<const int32_t> sequences, size_t count) const;

int32_t TokenToTokenId(const char* token) const;
Expand Down
33 changes: 33 additions & 0 deletions src/models/onnxruntime_inline.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,39 @@ inline constexpr ONNXTensorElementDataType TypeToTensorType<Float16_t> = ONNX_TE
template <>
inline constexpr ONNXTensorElementDataType TypeToTensorType<BFloat16_t> = ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16;

inline size_t SizeOf(ONNXTensorElementDataType type) {
switch (type) {
case Ort::TypeToTensorType<uint8_t>:
return sizeof(uint8_t);
case Ort::TypeToTensorType<int8_t>:
return sizeof(int8_t);
case Ort::TypeToTensorType<uint16_t>:
return sizeof(uint16_t);
case Ort::TypeToTensorType<int16_t>:
return sizeof(int16_t);
case Ort::TypeToTensorType<uint32_t>:
return sizeof(uint32_t);
case Ort::TypeToTensorType<int32_t>:
return sizeof(int32_t);
case Ort::TypeToTensorType<uint64_t>:
return sizeof(int64_t);
case Ort::TypeToTensorType<int64_t>:
return sizeof(int64_t);
case Ort::TypeToTensorType<bool>:
return sizeof(bool);
case Ort::TypeToTensorType<float>:
return sizeof(float);
case Ort::TypeToTensorType<double>:
return sizeof(double);
case Ort::TypeToTensorType<Ort::Float16_t>:
return sizeof(Ort::Float16_t);
case Ort::TypeToTensorType<Ort::BFloat16_t>:
return sizeof(Ort::BFloat16_t);
default:
throw std::runtime_error("Unsupported ONNXTensorElementDataType in GetTypeSize");
}
}

inline std::vector<std::string> GetAvailableProviders() {
int len;
char** providers;
Expand Down
5 changes: 4 additions & 1 deletion src/models/prompt_image_processor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,10 @@ std::unique_ptr<OrtValue> ProcessImageSizes(ortc::Tensor<int64_t>* image_sizes,

} // namespace

std::unique_ptr<Images> LoadImages(const std::span<const char* const>& image_paths) {
std::unique_ptr<Images> LoadImages(std::span<const char* const> image_paths) {
if (image_paths.empty())
throw std::runtime_error("No images provided");

for (const char* image_path : image_paths) {
if (!fs::path(image_path).exists()) {
throw std::runtime_error("Image path does not exist: " + std::string(image_path));
Expand Down
2 changes: 1 addition & 1 deletion src/models/prompt_image_processor.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ struct Images {
size_t num_images_{};
};

std::unique_ptr<Images> LoadImages(const std::span<const char* const>& image_paths);
std::unique_ptr<Images> LoadImages(std::span<const char* const> image_paths);

struct ImageProcessor {
ImageProcessor(Config& config, const SessionInfo& session_info);
Expand Down
2 changes: 1 addition & 1 deletion src/models/static_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ StaticBuffer::StaticBuffer(Ort::Allocator* allocator, size_t max_beam_batch_size

std::unique_ptr<OrtValue> StaticBuffer::CreateTensorOnStaticBuffer(std::span<const int64_t> shape,
ONNXTensorElementDataType type) {
size_t new_bytes = SizeOf(type) * GetNumElements(shape);
size_t new_bytes = Ort::SizeOf(type) * GetNumElements(shape);
if (buffer_ == nullptr) {
// Assuming the first dimension is the batch size
bytes_ = new_bytes * (max_beam_batch_size_ / shape[0]);
Expand Down
2 changes: 1 addition & 1 deletion src/models/whisper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ DeviceSpan<float> Whisper_State::Run(int current_length, DeviceSpan<int32_t>& ne
case RunState::Decoder_First: {
auto src_shape_info = init_presents_[0]->GetTensorTypeAndShapeInfo();

const auto copy_data_size_all = src_shape_info->GetElementCount() * SizeOf(src_shape_info->GetElementType());
const auto copy_data_size_all = src_shape_info->GetElementCount() * Ort::SizeOf(src_shape_info->GetElementType());

#if 0 // USE_CUDA
const auto src_dims = src_shape_info->GetShape();
Expand Down
Loading
Loading