Skip to content

Commit

Permalink
Renamed the namespace, added documentation about the C++ API and a sm…
Browse files Browse the repository at this point in the history
…all cleanup
  • Loading branch information
Avijit committed Feb 21, 2025
1 parent 50228db commit dacee9f
Show file tree
Hide file tree
Showing 8 changed files with 138 additions and 53 deletions.
79 changes: 79 additions & 0 deletions examples/slm_engine/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,85 @@ The SLM server supports the following REST APIs (click to expand):

</details>

### C++ Application using the SLMEngine

The SLMEngine is designed to be used from another C++ application running on the Edge. Integrating the SLMEngine into another C++ project using cmake is illustrated below.

First build the SLM Engine from source using the build instructions provided in this document. The build output are stored in the target specific `install/include` and `install/bin` directories.

#### CMakeLists.txt

```cmake
cmake_minimum_required(VERSION 3.28)
project(HelloSLM)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
add_library(slm_engine SHARED IMPORTED)
set_target_properties(
slm_engine
PROPERTIES
IMPORTED_IMPLIB
<location-of-slm-artifacts>/bin/libslmengine.so
)
add_library(ort SHARED IMPORTED)
set_target_properties(
ort
PROPERTIES
IMPORTED_IMPLIB
<location-of-slm-artifacts>/bin/libonnxruntime.so
add_library(ort_genai SHARED IMPORTED)
set_target_properties(
ort_genai
PROPERTIES
IMPORTED_IMPLIB
<location-of-slm-artifacts>/bin/libonnxruntime-genai.so
include_directories(<location-of-slm-artifacts>/include)
add_executable(hello_slm hello_slm.cpp)
target_link_libraries(inference_server slm_engine ort ort_genai)
```

#### hello_slm.cpp

```c++

#include <string>
#include <iostream>
#include "slm_engine.h"

int main(int argc, char **argv) {

auto slm_engine = microsoft::slm_engine::SLMEngine::CreateEngine(
"path to ONNX Model Directory", "phi3", true);

if (!slm_engine) {
std::cout << "Cannot create engine!\n";
return;
}

microsoft::slm_engine::SLMEngine::GenerationOptions generator_options;
generator_options.MaxGeneratedTokens = 2400;
std::string response_str;
microsoft::slm_engine::SLMEngine::RuntimePerf kpi;

// Call the SLM engine
slm_engine->generate("What is 2 + 2?", generator_options, response_str, kpi);

std::cout << "Generated Response: " << response_str << std::endl;
}

```
See the [slm_engine.h](src/cpp/slm_engine.h) for more details of the C++ API.
See the following reference CLI applications to learn more about how to use an HTTP server [slm_server.cpp](src/cpp/slm_server.cpp) or a CLI program for batch generation processing [slm_runner.cpp](src/cpp/slm_runner.cpp) using this library.
## Installation
Since this is targeted for various devices running on the Edge we provide a simple to use build setup that the developers can use to build for any system of their choosing.
Expand Down
4 changes: 2 additions & 2 deletions examples/slm_engine/src/cpp/input_decoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ using json = nlohmann::json;
using namespace std;

namespace microsoft {
namespace aias {
namespace slm_engine {

// clang-format off
// OpenAI API example
Expand Down Expand Up @@ -164,5 +164,5 @@ unique_ptr<InputDecoder> InputDecoder::CreateDecoder(const string& name) {
return nullptr;
}

} // namespace aias
} // namespace slm_engine
} // namespace microsoft
4 changes: 2 additions & 2 deletions examples/slm_engine/src/cpp/input_decoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#include <vector>

namespace microsoft {
namespace aias {
namespace slm_engine {
/// @brief An abstract class defining the interface to various types of
/// input decoder such as OpenAI and so on.
class InputDecoder {
Expand Down Expand Up @@ -108,5 +108,5 @@ class InputDecoder {
virtual bool decode(const std::string& message,
InputParams& decoded_params) = 0;
};
} // namespace aias
} // namespace slm_engine
} // namespace microsoft
79 changes: 40 additions & 39 deletions examples/slm_engine/src/cpp/input_decoder_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,52 +28,53 @@ using namespace std;
/// @param output_file Path to the JSONL file to save the SLM response and stats
/// @return 0 if successful, -1 otherwise
int run_test(const string& test_data_file) {
// Make sure that the files exist
// Make sure that the files exist

// Make sure that the files exist
if (!filesystem::exists(test_data_file)) {
cout << "Error! Test Data file doesn't exist: " << test_data_file
<< "\n";
return -1;
}
// Make sure that the files exist
if (!filesystem::exists(test_data_file)) {
cout << "Error! Test Data file doesn't exist: " << test_data_file
<< "\n";
return -1;
}

auto open_ai_decoder =
microsoft::aias::InputDecoder::CreateDecoder("openai");
string line;
ifstream test_data(test_data_file);
while (getline(test_data, line)) {
if (line.empty()) {
continue;
}
// call the decoder
microsoft::aias::InputDecoder::InputParams input_params;
auto status = open_ai_decoder->decode(line, input_params);
if (status) {
cout << BLUE << input_params.get_messages() << CLEAR << endl;
} else {
cout << MAGENTA_BOLD << "Error in decoding\n" << CLEAR;
}
auto open_ai_decoder =
microsoft::slm_engine::InputDecoder::CreateDecoder("openai");
string line;
ifstream test_data(test_data_file);
while (getline(test_data, line)) {
if (line.empty()) {
continue;
}
// call the decoder
microsoft::slm_engine::InputDecoder::InputParams input_params;
auto status = open_ai_decoder->decode(line, input_params);
if (status) {
cout << BLUE << input_params.get_messages() << CLEAR << endl;
} else {
cout << MAGENTA_BOLD << "Error in decoding\n"
<< CLEAR;
}
return 0;
}
return 0;
}

/// @brief Program entry point
int main(int argc, char** argv) {
argparse::ArgumentParser program("slm_runner", "1.0",
argparse ::default_arguments::none);
string test_data_file;
program.add_argument("-t", "--test_data_file")
.required()
.help("Path to the test data file (JSONL)")
.store_into(test_data_file);
argparse::ArgumentParser program("slm_runner", "1.0",
argparse ::default_arguments::none);
string test_data_file;
program.add_argument("-t", "--test_data_file")
.required()
.help("Path to the test data file (JSONL)")
.store_into(test_data_file);

try {
program.parse_args(argc, argv);
} catch (const std::exception& err) {
std::cerr << err.what() << std::endl;
std::cerr << program;
std::exit(-1);
}
try {
program.parse_args(argc, argv);
} catch (const std::exception& err) {
std::cerr << err.what() << std::endl;
std::cerr << program;
std::exit(-1);
}

return run_test(test_data_file);
return run_test(test_data_file);
}
9 changes: 7 additions & 2 deletions examples/slm_engine/src/cpp/slm_engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,12 @@ using json = nlohmann::json;
#define CLEAR "\033[0m"

namespace microsoft {
namespace aias {
namespace slm_engine {

std::string SLMEngine::GetVersion() {
// SW_VERSION_NUMBER is defined in the CMakeLists.txt file
return std::string(SW_VERSION_NUMBER);
}

std::unique_ptr<SLMEngine> SLMEngine::CreateEngine(
const char* model_path, const std::string& model_family_name, bool verbose) {
Expand Down Expand Up @@ -404,5 +409,5 @@ uint32_t SLMEngine::GetMemoryUsage() {
#endif
}

} // namespace aias
} // namespace slm_engine
} // namespace microsoft
6 changes: 3 additions & 3 deletions examples/slm_engine/src/cpp/slm_engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
#endif

namespace microsoft {
namespace aias {
namespace slm_engine {

/// @brief SLM Engine class to interact with the GenAI Model
///
Expand Down Expand Up @@ -204,7 +204,7 @@ class SLM_ENGINE_EXPORT SLMEngine {

SLMEngine(const SLMEngine&) = delete;
SLMEngine& operator=(const SLMEngine&) = delete;
static std::string GetVersion() { return std::string(SW_VERSION_NUMBER); }
static std::string GetVersion();

private:
SLMEngine(bool verbose) : m_verbose(verbose) {}
Expand Down Expand Up @@ -250,5 +250,5 @@ class SLM_ENGINE_EXPORT SLMEngine {
// Need a scoped mutex to ensure only one complete() call at a time
std::mutex m_mutex;
};
} // namespace aias
} // namespace slm_engine
} // namespace microsoft
4 changes: 2 additions & 2 deletions examples/slm_engine/src/cpp/slm_runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ int run_test(const string& model_path, const string& model_family,
<< "Test File: " << test_data_file << "\n";

// Create the SLM
auto slm_engine = microsoft::aias::SLMEngine::CreateEngine(
auto slm_engine = microsoft::slm_engine::SLMEngine::CreateEngine(
model_path.c_str(), model_family, verbose);
if (!slm_engine) {
cout << "Cannot create engine!\n";
Expand Down Expand Up @@ -144,7 +144,7 @@ int main(int argc, char** argv) {
"If provided, more debugging information printed on standard "
"output");

cout << "SLM Runner Version: " << microsoft::aias::SLMEngine::GetVersion()
cout << "SLM Runner Version: " << microsoft::slm_engine::SLMEngine::GetVersion()
<< endl;
try {
program.parse_args(argc, argv);
Expand Down
6 changes: 3 additions & 3 deletions examples/slm_engine/src/cpp/slm_server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ using namespace std;
int run_server(const string& model_path, const string& model_family,
int port_number, bool verbose) {
// Create the SLM
auto slm_engine = microsoft::aias::SLMEngine::CreateEngine(
auto slm_engine = microsoft::slm_engine::SLMEngine::CreateEngine(
model_path.c_str(), model_family, verbose);
if (!slm_engine) {
cout << "Cannot create engine!\n";
Expand All @@ -46,7 +46,7 @@ int run_server(const string& model_path, const string& model_family,

json engine_state = {
{"model", std::filesystem::path(model_path).filename().string()},
{"engine_version", microsoft::aias::SLMEngine::GetVersion()}};
{"engine_version", microsoft::slm_engine::SLMEngine::GetVersion()}};
response_body["engine_state"] = engine_state;
json get_response;
get_response["response"] = response_body;
Expand Down Expand Up @@ -116,7 +116,7 @@ int main(int argc, char** argv) {
"If provided, more debugging information printed on standard "
"output");

cout << "SLM Runner Version: " << microsoft::aias::SLMEngine::GetVersion()
cout << "SLM Runner Version: " << microsoft::slm_engine::SLMEngine::GetVersion()
<< endl;
try {
program.parse_args(argc, argv);
Expand Down

0 comments on commit dacee9f

Please sign in to comment.