diff --git a/content/learning-paths/embedded-and-microcontrollers/rpi-llama3/llama3.md b/content/learning-paths/embedded-and-microcontrollers/rpi-llama3/llama3.md index 5b576665d..714ec069d 100755 --- a/content/learning-paths/embedded-and-microcontrollers/rpi-llama3/llama3.md +++ b/content/learning-paths/embedded-and-microcontrollers/rpi-llama3/llama3.md @@ -90,7 +90,9 @@ cmake -DPYTHON_EXECUTABLE=python \ -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \ -DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON \ + -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \ + -DEXECUTORCH_BUILD_KERNELS_LLM=ON \ -Bcmake-out . cmake --build cmake-out -j16 --target install --config Release ``` @@ -101,10 +103,7 @@ Next, compile and build `llama_runner` and `llama_main`: cmake -DPYTHON_EXECUTABLE=python \ -DCMAKE_INSTALL_PREFIX=cmake-out \ -DCMAKE_BUILD_TYPE=Release \ - -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ - -DEXECUTORCH_BUILD_XNNPACK=ON \ - -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ -Bcmake-out/examples/models/llama \ examples/models/llama cmake --build cmake-out/examples/models/llama -j16 --config Release diff --git a/content/learning-paths/mobile-graphics-and-gaming/build-llama3-chat-android-app-using-executorch-and-xnnpack/5-run-benchmark-on-android.md b/content/learning-paths/mobile-graphics-and-gaming/build-llama3-chat-android-app-using-executorch-and-xnnpack/5-run-benchmark-on-android.md index 158c47cbc..a9422241f 100644 --- a/content/learning-paths/mobile-graphics-and-gaming/build-llama3-chat-android-app-using-executorch-and-xnnpack/5-run-benchmark-on-android.md +++ b/content/learning-paths/mobile-graphics-and-gaming/build-llama3-chat-android-app-using-executorch-and-xnnpack/5-run-benchmark-on-android.md @@ -22,9 +22,9 @@ export ANDROID_NDK=$ANDROID_HOME/ndk/28.0.12433566/ Make sure you can confirm $ANDROID_NDK/build/cmake/android.toolchain.cmake is available for CMake to cross-compile. {{% /notice %}} -### 2. Build ExecuTorch and associated libraries for Android with KleidiAI +### 2. Build ExecuTorch and associated libraries for Android with KleidiAI -You are now ready to build ExecuTorch for Android by taking advantage of the performance optimization provided by the [KleidiAI](https://gitlab.arm.com/kleidi/kleidiai) kernels. +You are now ready to build ExecuTorch for Android by taking advantage of the performance optimization provided by the [KleidiAI](https://gitlab.arm.com/kleidi/kleidiai) kernels. Use `cmake` to cross-compile ExecuTorch: @@ -119,7 +119,7 @@ adb push cmake-out-android/examples/models/llama/llama_main /data/local/tmp/llam Use the Llama runner to execute the model on the phone with the `adb` command: ``` bash -adb shell "cd /data/local/tmp/llama && ./llama_main --model_path llama3_1B_kv_sdpa_xnn_qe_4_64_1024_embedding_4bit.pte --tokenizer_path tokenizer.model --prompt "<|start_header_id|>system<|end_header_id|>\nYour name is Cookie. you are helpful, polite, precise, concise, honest, good at writing. You always give precise and brief answers up to 32 words<|eot_id|><|start_header_id|>user<|end_header_id|>\nHey Cookie! how are you today?<|eot_id|><|start_header_id|>assistant<|end_header_id|>" --warmup=1 --cpu_threads=5" +adb shell "cd /data/local/tmp/llama && ./llama_main --model_path llama3_1B_kv_sdpa_xnn_qe_4_64_1024_embedding_4bit.pte --tokenizer_path tokenizer.model --prompt '<|start_header_id|>system<|end_header_id|>\nYour name is Cookie. you are helpful, polite, precise, concise, honest, good at writing. You always give precise and brief answers up to 32 words<|eot_id|><|start_header_id|>user<|end_header_id|>\nHey Cookie! how are you today?<|eot_id|><|start_header_id|>assistant<|end_header_id|>' --warmup=1 --cpu_threads=5" ``` The output should look something like this.