Skip to content

Commit 8d27733

Browse files
PR into #488 (#675)
* Bump flyteidl2 version to latest and get rid of the manually copied code * Fix the pem issue where we had to manually download the cert from amazon before creating the channel (with tonic connections you have to call `with_native_roots()` to pick up root certs). * Fix maturin setting so it'll build. * Pull in some changes that have been made to the main controller. (#607, #521, #621) * Add in some CI as a separate PR - #676. But we need to play with this experience a bit more first. Need to go through the whole experience and see how things feel. * More changes described in #675 (comment) --------- Signed-off-by: Yee Hing Tong <wild-endeavor@users.noreply.github.com> Signed-off-by: machichima <nary12321@gmail.com> Co-authored-by: machichima <nary12321@gmail.com>
1 parent ee6666f commit 8d27733

51 files changed

Lines changed: 2800 additions & 1423 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/lint.yml

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,51 @@ jobs:
2727
run: |
2828
make fmt
2929
git diff --exit-code
30+
check-flyteidl2-versions:
31+
name: check flyteidl2 versions
32+
runs-on: ubuntu-latest
33+
steps:
34+
- name: Fetch the code
35+
uses: actions/checkout@v4
36+
- name: Check flyteidl2 version consistency
37+
run: |
38+
# Extract flyteidl2 version from root pyproject.toml
39+
ROOT_VER=$(grep 'flyteidl2==' pyproject.toml | head -1 | sed 's/.*flyteidl2==\([^"]*\).*/\1/')
40+
echo "Root pyproject.toml: flyteidl2==$ROOT_VER"
41+
42+
# Extract flyteidl2 version from rs_controller/Cargo.toml
43+
CARGO_VER=$(grep 'flyteidl2' rs_controller/Cargo.toml | grep -v '^#' | sed 's/.*"=\(.*\)".*/\1/')
44+
echo "rs_controller/Cargo.toml: flyteidl2=$CARGO_VER"
45+
46+
# Extract flyteidl2 version from rs_controller/pyproject.toml
47+
RS_VER=$(grep 'flyteidl2==' rs_controller/pyproject.toml | head -1 | sed 's/.*flyteidl2==\([^"]*\).*/\1/')
48+
echo "rs_controller/pyproject.toml: flyteidl2==$RS_VER"
49+
50+
# Compare all three
51+
if [ "$ROOT_VER" != "$CARGO_VER" ] || [ "$ROOT_VER" != "$RS_VER" ]; then
52+
echo "ERROR: flyteidl2 versions do not match!"
53+
echo " pyproject.toml: $ROOT_VER"
54+
echo " rs_controller/Cargo.toml: $CARGO_VER"
55+
echo " rs_controller/pyproject.toml: $RS_VER"
56+
exit 1
57+
fi
58+
echo "All flyteidl2 versions match: $ROOT_VER"
3059
rs-fmt:
3160
name: rust fmt
3261
runs-on: ubuntu-latest
3362
steps:
3463
- name: Fetch the code
3564
uses: actions/checkout@v4
65+
- name: Cache Cargo registry and build
66+
uses: actions/cache@v4
67+
with:
68+
path: |
69+
~/.cargo/registry
70+
~/.cargo/git
71+
rs_controller/target
72+
key: ${{ runner.os }}-cargo-fmt-${{ hashFiles('rs_controller/Cargo.lock') }}
73+
restore-keys: |
74+
${{ runner.os }}-cargo-fmt-
3675
- name: Install nightly toolchain
3776
run: |
3877
rustup toolchain install nightly
@@ -46,6 +85,16 @@ jobs:
4685
steps:
4786
- name: Fetch the code
4887
uses: actions/checkout@v4
88+
- name: Cache Cargo registry and build
89+
uses: actions/cache@v4
90+
with:
91+
path: |
92+
~/.cargo/registry
93+
~/.cargo/git
94+
rs_controller/target
95+
key: ${{ runner.os }}-cargo-lint-${{ hashFiles('rs_controller/Cargo.lock') }}
96+
restore-keys: |
97+
${{ runner.os }}-cargo-lint-
4998
- name: Install toolchain
5099
run: |
51100
rustup toolchain install

.github/workflows/publish.yml

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,95 @@ name: Publish
33
on:
44
release:
55
types: [published]
6+
push:
7+
branches: [revisit-protos-ci]
68
pull_request:
79
paths:
810
- ".github/workflows/publish.yml"
911
- "maint_tools/build_default_image.py"
1012

1113
jobs:
14+
rs-controller-wheels:
15+
name: Build RS controller wheel (${{ matrix.target }})
16+
runs-on: ${{ matrix.os }}
17+
strategy:
18+
matrix:
19+
include:
20+
- target: x86_64
21+
os: ubuntu-latest
22+
- target: aarch64
23+
os: ubuntu-24.04-arm
24+
- target: aarch64-apple-darwin
25+
os: macos-latest
26+
steps:
27+
- uses: actions/checkout@v4
28+
with:
29+
fetch-depth: "0"
30+
- name: Set version from tag
31+
run: |
32+
if [[ "$GITHUB_REF" == refs/tags/v* ]]; then
33+
VERSION=$(echo "$GITHUB_REF" | sed 's|refs/tags/v||')
34+
else
35+
VERSION="0.0.0.dev0"
36+
fi
37+
echo "VERSION=$VERSION" >> $GITHUB_ENV
38+
sed "s/^version = .*/version = \"$VERSION\"/" rs_controller/pyproject.toml > tmp && mv tmp rs_controller/pyproject.toml
39+
echo "Set version to $VERSION"
40+
cat rs_controller/pyproject.toml
41+
- name: Build wheels
42+
uses: PyO3/maturin-action@v1
43+
with:
44+
target: ${{ matrix.target }}
45+
manylinux: auto
46+
args: --release --out dist -m rs_controller/Cargo.toml
47+
sccache: true
48+
- name: Upload wheels
49+
uses: actions/upload-artifact@v4
50+
with:
51+
name: rs-controller-wheel-${{ matrix.target }}
52+
path: dist/*.whl
53+
54+
rs-controller-publish:
55+
name: Publish RS controller to PyPI
56+
needs: rs-controller-wheels
57+
runs-on: ubuntu-latest
58+
if: github.event_name == 'release'
59+
steps:
60+
- name: Download all wheel artifacts
61+
uses: actions/download-artifact@v4
62+
with:
63+
pattern: rs-controller-wheel-*
64+
merge-multiple: true
65+
path: dist/
66+
- name: Install twine
67+
run: pip install twine
68+
- name: Publish to PyPI
69+
env:
70+
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
71+
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
72+
run: |
73+
twine upload --verbose dist/*
74+
- name: Wait for PyPI availability
75+
run: |
76+
VERSION=$(echo "$GITHUB_REF" | sed 's|refs/tags/v||')
77+
LINK="https://pypi.org/project/flyte_controller_base/${VERSION}/"
78+
echo "Waiting for $LINK"
79+
for i in $(seq 1 60); do
80+
if curl -L -I -s -f "$LINK"; then
81+
echo "Found on PyPI: $LINK"
82+
exit 0
83+
else
84+
echo "Attempt $i: not yet available, retrying in 10s..."
85+
sleep 10
86+
fi
87+
done
88+
echo "ERROR: timed out waiting for PyPI"
89+
exit 1
90+
1291
flyte-pypi:
1392
name: PyPI package
93+
needs: rs-controller-publish
94+
if: always() && (needs.rs-controller-publish.result == 'success' || needs.rs-controller-publish.result == 'skipped')
1495
runs-on: ubuntu-latest
1596
steps:
1697
- uses: actions/checkout@v4
@@ -27,6 +108,13 @@ jobs:
27108
run: |
28109
uv venv
29110
uv pip install build twine setuptools wheel
111+
- name: Pin flyte_controller_base version (release only)
112+
if: github.event_name == 'release'
113+
run: |
114+
VERSION=$(echo "$GITHUB_REF" | sed 's|refs/tags/v||')
115+
sed -i "s/flyte_controller_base>=2.0.0b0/flyte_controller_base==${VERSION}/" pyproject.toml
116+
echo "Pinned flyte_controller_base==${VERSION}"
117+
grep flyte_controller_base pyproject.toml
30118
- name: Build and publish
31119
run: |
32120
uv run python -m build --wheel --installer uv

README.md

Lines changed: 82 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -129,68 +129,108 @@ pip install flyte[tui]
129129

130130
Flyte 2 is licensed under the [Apache 2.0 License](LICENSE).
131131

132-
## Developing the Core Controller
132+
## Rust Controller (experimental)
133133

134-
Create a separate virtual environment for the Rust contoller inside the rs_controller folder. The reason for this is
135-
because the rust controller should be a separate pypi package. The reason it should be a separate pypi package is that
136-
including it into the main SDK as a core component means the entire build toolchain for the SDK will need to become
137-
rust/maturin based. We should probably move to this model in the future though.
134+
The Rust controller is an alternative implementation of the remote controller written in Rust and exposed
135+
to Python via maturin / pyo3. Distributed as a separate `flyte_controller_base` wheel so the main SDK does
136+
not need to switch its build toolchain to rust/maturin. Keep important dependencies (notably `flyteidl2`)
137+
in lockstep between `pyproject.toml`, `rs_controller/pyproject.toml`, and `rs_controller/Cargo.toml`.
138138

139-
Keep important dependencies the same though, namely flyteidl2.
139+
### Running with the Rust controller
140140

141-
The following instructions are for helping to build the default multi-arch image. Each architecture needs a different wheel. Each wheel needs to be built by a different docker image.
141+
The Rust controller is gated behind an env var. Set it to `1` (also accepts `true` / `yes`):
142142

143-
### Setup Builders
144-
`cd` into `rs_controller` and run `make build-builders`. This will build the builder images once, so you can keep using them as the rust code changes.
143+
```bash
144+
_F_USE_RUST_CONTROLLER=1 python examples/basics/hello_v2.py
145+
```
146+
147+
The driver propagates this env var to all sub-task pods, so both the driver and child actions use the
148+
Rust controller for that run.
149+
150+
> **v1 limitations.** The Rust controller currently supports only the legacy
151+
> QueueService + StateService path. Do **not** combine `_F_USE_RUST_CONTROLLER=1` with
152+
> `_U_USE_ACTIONS=1` until ActionsService support lands. Other gaps tracked as follow-ups:
153+
> abort RPC on cancel, trace-action enqueue, `Code.ABORTED` fast-fail, tunable retries / QPS,
154+
> graceful `stop()`. See PR #675.
155+
156+
> Dev iteration requires the local image builder. The `flyte_controller_base` wheel is not
157+
> on PyPI until release, and the remote image builder installs all wheels in a layer at once,
158+
> so it cannot resolve `flyte_controller_base` from a sibling layer. Use the local image
159+
> builder while developing the Rust controller:
160+
>
161+
> ```yaml
162+
> # .flyte/config.yaml
163+
> image:
164+
> builder: local
165+
> ```
166+
167+
### Developing the Rust controller
168+
169+
#### One-time setup
145170
146-
### Iteration Cycle
147-
Run `make build-wheels` to actually build the multi-arch wheels. This command should probably be updated to build all three,
148-
currently it only builds for linux/amd64 and linux/arm64... the `make build-wheel-local` command builds a macosx wheel,
149-
unclear what the difference is between that and the arm64 one, and unclear if both are present, which one pip chooses.
171+
Build the manylinux builder images. They are cached, so you only need to rebuild them when the
172+
build tooling itself changes:
150173
151-
`cd` back up to the root folder of this project and proceed with
152174
```bash
153-
make dist
154-
python maint_tools/build_default_image.py
175+
cd rs_controller
176+
make build-builders
177+
cd ..
155178
```
156179
157-
To install the wheel locally for testing, use the following command with your venv active.
180+
#### Iteration loop
181+
182+
After every Rust change, run the all-in-one dev target from the repo root:
183+
158184
```bash
159-
uv pip install --find-links ./rs_controller/dist --no-index --force-reinstall --no-deps flyte_controller_base
185+
REGISTRY=<your-registry> make dev-rs-dist
160186
```
161-
Repeat this process to iterate - build new wheels, force reinstall the controller package.
162187
163-
### Build Configuration Summary
188+
`dev-rs-dist` does four things:
164189
165-
In order to support both Rust crate publication and Python wheel distribution, we have
166-
to sometimes use and sometimes not use the 'pyo3/extension-module' feature. To do this, this
167-
project's Cargo.toml itself can toggle this on and off.
190+
1. `cd rs_controller && make build-wheels` — build manylinux x86_64 + aarch64 wheels (use
191+
`make build-wheel-local` if you only need a macOS wheel for the driver).
192+
2. `make dist` — build the main `flyte` SDK wheel.
193+
3. `uv run python maint_tools/build_default_image.py --registry $(REGISTRY)` — build the default
194+
image with both wheels baked in and push it to your registry.
195+
4. `uv pip install --find-links ./rs_controller/dist --no-index --force-reinstall --no-deps flyte_controller_base` —
196+
refresh the wheel in your local venv so the driver picks up the new build.
168197
169-
[features]
170-
default = ["pyo3/auto-initialize"] # For Rust crate users (links to libpython)
171-
extension-module = ["pyo3/extension-module"] # For Python wheels (no libpython linking)
198+
After this, any `flyte.TaskEnvironment` that does not pass an explicit `image=` will resolve to the default
199+
debian image and automatically have the Rust wheel layered in. If you do pass an explicit `image=`, the
200+
auto-bake is skipped; in that case, chain `.with_local_rs_controller()` onto the image to bake the Rust wheel
201+
manually.
172202
173-
The cargo file contains
203+
If you only changed Python (not Rust), you can skip the wheel rebuild and just run `make dist` plus
204+
the rebuild image step. The Rust wheel is reused.
174205
175-
# Cargo.toml
176-
[lib]
177-
crate-type = ["rlib", "cdylib"] # Support both Rust and Python usage
206+
### Build configuration summary
178207
179-
When using 'default', 'auto-initialize' is turned on, which requires linking to libpython, which exists on local Mac so
180-
this works nicely. It is not available in manylinux however, so trying to build with this feature in a manylinux docker
181-
image will fail. But that's okay, because the purpose of the manylinux container is to build wheels,
182-
and for wheels, we need the 'extension-module' feature, which disables linking to libpython.
208+
The Rust crate ships with two cargo features so the same project can produce a Rust rlib and a
209+
Python extension wheel:
183210
184-
The key insight: auto-initialize is for embedding Python in Rust (needs libpython), while
185-
extension-module is for extending Python with Rust (must NOT link libpython for portability).
211+
```toml
212+
[features]
213+
default = ["pyo3/auto-initialize"] # Rust crate users; links libpython
214+
extension-module = ["pyo3/extension-module"] # Python wheels; no libpython linking
215+
216+
[lib]
217+
crate-type = ["rlib", "cdylib"] # Both Rust and Python usage
218+
```
186219
187-
This setup makes it possible to build wheels and also run Rust binaries with `cargo run --bin`.
220+
- `pyo3/auto-initialize` embeds Python into Rust (works locally on macOS, fails inside the manylinux
221+
builder because libpython is unavailable there).
222+
- `pyo3/extension-module` extends Python with Rust (must not link libpython for portable wheels).
223+
224+
So local `cargo run --bin <name>` uses `default` features, and the manylinux builder explicitly
225+
disables defaults and turns on `extension-module`:
226+
227+
```toml
228+
# rs_controller/pyproject.toml
229+
[tool.maturin]
230+
no-default-features = true
231+
features = ["extension-module"]
232+
```
188233
189-
(not sure if this is needed)
190-
# pyproject.toml
191-
[tool.maturin]
192-
features = ["extension-module"] # Tells maturin to use extension-module feature
193-
194234
## Learn More
195235
196236
- **[Live Demo](https://flyte2intro.apps.demo.hosted.unionai.cloud/)** — Try Flyte 2 in your browser

examples/advanced/cancel_tasks.py

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,8 @@
11
import asyncio
2-
from pathlib import Path
32

4-
import flyte
53
import flyte.errors
6-
from flyte._image import PythonWheels
74

8-
controller_dist_folder = Path("/Users/ytong/go/src/github.com/flyteorg/sdk-rust/rs_controller/dist")
9-
wheel_layer = PythonWheels(wheel_dir=controller_dist_folder, package_name="flyte_controller_base")
10-
base = flyte.Image.from_debian_base()
11-
rs_controller_image = base.clone(addl_layer=wheel_layer)
12-
13-
14-
env = flyte.TaskEnvironment("cancel", image=rs_controller_image)
5+
env = flyte.TaskEnvironment("cancel")
156

167

178
@env.task

examples/basics/devbox_one.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,12 @@
11
import asyncio
22
import logging
3-
from pathlib import Path
43
from typing import List
54

65
import flyte
7-
from flyte._image import PythonWheels
8-
9-
controller_dist_folder = Path("/Users/ytong/go/src/github.com/flyteorg/sdk-rust/rs_controller/dist")
10-
wheel_layer = PythonWheels(wheel_dir=controller_dist_folder, package_name="flyte_controller_base")
11-
base = flyte.Image.from_debian_base()
12-
rs_controller_image = base.clone(addl_layer=wheel_layer)
136

147
env = flyte.TaskEnvironment(
158
name="hello_world",
169
resources=flyte.Resources(cpu=1, memory="1Gi"),
17-
image=rs_controller_image,
1810
)
1911

2012

examples/basics/hello.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,9 @@
1-
from pathlib import Path
2-
31
import flyte
4-
from flyte._image import PythonWheels
5-
6-
controller_dist_folder = Path("/Users/ytong/go/src/github.com/flyteorg/sdk-rust/rs_controller/dist")
7-
wheel_layer = PythonWheels(wheel_dir=controller_dist_folder, package_name="flyte_controller_base")
8-
base = flyte.Image.from_debian_base()
9-
rs_controller_image = base.clone(addl_layer=wheel_layer)
102

113
# TaskEnvironments provide a simple way of grouping configuration used by tasks (more later).
124
env = flyte.TaskEnvironment(
135
name="hello_world",
146
resources=flyte.Resources(memory="250Mi"),
15-
image=rs_controller_image,
167
)
178

189

0 commit comments

Comments
 (0)