diff --git a/.dstack/workflows/conda.yaml b/.dstack/workflows/conda.yaml index a95d960..a8448e1 100644 --- a/.dstack/workflows/conda.yaml +++ b/.dstack/workflows/conda.yaml @@ -21,3 +21,14 @@ workflows: commands: - conda activate myenv - python usage/conda/hello_pandas.py + + - name: xgboost-env + help: "This workflow prepares myenv Conde environment with xgboost installed." + commands: + - conda env create --file usage/conda/xgboost.yaml + - conda activate xgboost-env + - conda install -y scikit-learn + - conda install -y xgboost + provider: bash + artifacts: + - path: /opt/conda/envs/xgboost-env diff --git a/.dstack/workflows/xgboost.yaml b/.dstack/workflows/xgboost.yaml new file mode 100644 index 0000000..48b56f9 --- /dev/null +++ b/.dstack/workflows/xgboost.yaml @@ -0,0 +1,8 @@ +workflows: + - name: xgboost + provider: bash + deps: + - workflow: xgboost-env + commands: + - conda activate xgboost-env + - python examples/xgboost/train.py ${{ run.args }} diff --git a/examples/xgboost/train.py b/examples/xgboost/train.py new file mode 100644 index 0000000..1b12330 --- /dev/null +++ b/examples/xgboost/train.py @@ -0,0 +1,48 @@ +import argparse + +from sklearn.datasets import load_iris +from sklearn import metrics, model_selection +import xgboost as xgb + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--learning-rate", + default=0.2, + type=float + ) + return vars(parser.parse_args()) + + +def main(): + args = parse_args() + print(args) + + iris = load_iris() + data, labels = iris.data, iris.target + labels = iris.target + data_train, data_test, labels_train, labels_test = model_selection.train_test_split( + data, labels, test_size=0.1, random_state=2023) + + data_train = xgb.DMatrix(data_train, label=labels_train) + data_test = xgb.DMatrix(data_test, label=labels_test) + params = { + "learning_rate": args["learning_rate"], + "objective": "multi:softprob", + "seed": 2023, + "num_class": 3, + } + model = xgb.train(params, data_train, evals=[(data_train, "train")]) + + y_proba = model.predict(data_test) + y_pred = y_proba.argmax(axis=1) + loss = metrics.log_loss(labels_test, y_proba) + acc = metrics.accuracy_score(labels_test, y_pred) + + print(f"Model trained: loss={loss:.2f}, acc={acc:.2f}") + + +if __name__ == "__main__": + main() + diff --git a/usage/conda/xgboost.yaml b/usage/conda/xgboost.yaml new file mode 100644 index 0000000..e310987 --- /dev/null +++ b/usage/conda/xgboost.yaml @@ -0,0 +1,4 @@ +name: xgboost-env + +dependencies: + - python=3.10