From 9ba4e58af63892057f781f4509aef05ea3bc6cd1 Mon Sep 17 00:00:00 2001 From: Sergey Gavrenkov Date: Wed, 13 Aug 2025 18:39:39 -0400 Subject: [PATCH 01/36] working --- .../datarobot_drum/drum/description.py | 2 +- .../datarobot_drum/drum/drum.py | 10 +- .../datarobot_drum/drum/main.py | 9 +- .../drum/root_predictors/drum_inline_utils.py | 4 +- .../drum/root_predictors/prediction_server.py | 9 +- .../datarobot_drum/drum/runtime.py | 15 +- .../datarobot_drum/drum/server.py | 5 +- .../python311_genai_agents/run_agent.py | 3 + .../nim_sidecar/Dockerfile | 8 +- .../nim_sidecar/app.py | 32 ++++ .../nim_sidecar/env_info.json | 2 +- .../nim_sidecar/requirements.in | 1 + .../nim_sidecar/requirements.txt | 150 +++++++++--------- .../nim_sidecar/start_server.sh | 13 +- public_dropin_nim_environments/rr | 12 ++ 15 files changed, 171 insertions(+), 104 deletions(-) create mode 100644 public_dropin_nim_environments/nim_sidecar/app.py create mode 100644 public_dropin_nim_environments/rr diff --git a/custom_model_runner/datarobot_drum/drum/description.py b/custom_model_runner/datarobot_drum/drum/description.py index 606c506a1..d0d3b6800 100644 --- a/custom_model_runner/datarobot_drum/drum/description.py +++ b/custom_model_runner/datarobot_drum/drum/description.py @@ -4,6 +4,6 @@ This is proprietary source code of DataRobot, Inc. and its affiliates. Released under the terms of DataRobot Tool and Utility Agreement. """ -version = "1.16.20" +version = "1.16.21" __version__ = version project_name = "datarobot-drum" diff --git a/custom_model_runner/datarobot_drum/drum/drum.py b/custom_model_runner/datarobot_drum/drum/drum.py index 37df6da5e..08e94b5a9 100644 --- a/custom_model_runner/datarobot_drum/drum/drum.py +++ b/custom_model_runner/datarobot_drum/drum/drum.py @@ -461,7 +461,7 @@ def raise_multiple_custom_files(py_paths, r_paths, jl_paths): run_language = RunLanguage.PYTHON return run_language - def run(self): + def run(self, app): try: if self.options.docker and ( self.run_mode not in (RunMode.PUSH, RunMode.PERF_TEST, RunMode.VALIDATION) @@ -495,14 +495,14 @@ def run(self): stats_collector.enable() try: with self._setup_output_if_not_exists(): - self._run_predictions(stats_collector) + self._run_predictions(app, stats_collector) finally: if stats_collector: stats_collector.disable() if stats_collector: stats_collector.print_reports() elif self.run_mode == RunMode.SERVER: - self._run_predictions() + self._run_predictions(app) elif self.run_mode == RunMode.FIT: self.run_fit() elif self.run_mode == RunMode.PERF_TEST: @@ -812,7 +812,7 @@ def get_predictor_params(self): raise DrumCommonException("Arguments are missing in the pipeline") return pipeline["pipe"][0]["arguments"] - def _run_predictions(self, stats_collector: Optional[StatsCollector] = None): + def _run_predictions(self, app, stats_collector: Optional[StatsCollector] = None): if self.run_mode not in [RunMode.SCORE, RunMode.SERVER]: raise NotImplemented(f"The given run mode is supported here: {self.run_mode}") @@ -832,7 +832,7 @@ def _run_predictions(self, stats_collector: Optional[StatsCollector] = None): ) if stats_collector: stats_collector.mark("init") - predictor.materialize() + predictor.materialize(app) if stats_collector: stats_collector.mark("run") finally: diff --git a/custom_model_runner/datarobot_drum/drum/main.py b/custom_model_runner/datarobot_drum/drum/main.py index d6384ec2f..9f18ad5f8 100644 --- a/custom_model_runner/datarobot_drum/drum/main.py +++ b/custom_model_runner/datarobot_drum/drum/main.py @@ -59,8 +59,9 @@ ) -def main(): - with DrumRuntime() as runtime: +def main(app, app1): + print(sys.argv) + with DrumRuntime(app1) as runtime: config_logging() def signal_handler(sig, frame): @@ -116,10 +117,10 @@ def signal_handler(sig, frame): try: runtime.cm_runner = CMRunner(runtime) - runtime.cm_runner.run() + runtime.cm_runner.run(app) except DrumSchemaValidationException: sys.exit(ExitCodes.SCHEMA_VALIDATION_ERROR.value) if __name__ == "__main__": - main() + pass diff --git a/custom_model_runner/datarobot_drum/drum/root_predictors/drum_inline_utils.py b/custom_model_runner/datarobot_drum/drum/root_predictors/drum_inline_utils.py index 2d94fce9c..74704ed47 100644 --- a/custom_model_runner/datarobot_drum/drum/root_predictors/drum_inline_utils.py +++ b/custom_model_runner/datarobot_drum/drum/root_predictors/drum_inline_utils.py @@ -36,7 +36,7 @@ @contextlib.contextmanager def drum_inline_predictor( - target_type: str, custom_model_dir: str, target_name: str, *cmd_args: List[str] + target_type: str, custom_model_dir: str, target_name: str, app, *cmd_args: List[str] ) -> Generator[BaseLanguagePredictor, None, None]: """ Drum run for a custom model code definition. Yields a predictor, ready to work with. @@ -48,7 +48,7 @@ def drum_inline_predictor( :param cmd_args: Extra command line arguments :return: """ - with DrumRuntime() as runtime, tempfile.NamedTemporaryFile(mode="wb") as tf: + with DrumRuntime(app) as runtime, tempfile.NamedTemporaryFile(mode="wb") as tf: # setup os.environ["TARGET_NAME"] = target_name diff --git a/custom_model_runner/datarobot_drum/drum/root_predictors/prediction_server.py b/custom_model_runner/datarobot_drum/drum/root_predictors/prediction_server.py index d2a6b3d5e..61d4113f5 100644 --- a/custom_model_runner/datarobot_drum/drum/root_predictors/prediction_server.py +++ b/custom_model_runner/datarobot_drum/drum/root_predictors/prediction_server.py @@ -136,7 +136,7 @@ def _post_predict_and_transform(self): self._stats_collector.disable() self._stdout_flusher.set_last_activity_time() - def materialize(self): + def materialize(self, app): model_api = base_api_blueprint(self._terminate, self._predictor) @model_api.route("/capabilities/", methods=["GET"]) @@ -283,7 +283,7 @@ def handle_exception(e): cli = sys.modules["flask.cli"] cli.show_server_banner = lambda *x: None - app = get_flask_app(model_api) + app = get_flask_app(model_api, app) self.load_flask_extensions(app) self._run_flask_app(app) @@ -301,7 +301,10 @@ def _run_flask_app(self, app): processes = self._params.get("processes") logger.info("Number of webserver processes: %s", processes) try: - app.run(host, port, threaded=False, processes=processes) + pass + #logger.info("hhhhhhhhh Host and port: %s", host + port) + print(f"hhhhhhhhh{host}, {port}") + #app.run(host, port, threaded=False, processes=processes) except OSError as e: raise DrumCommonException("{}: host: {}; port: {}".format(e, host, port)) diff --git a/custom_model_runner/datarobot_drum/drum/runtime.py b/custom_model_runner/datarobot_drum/drum/runtime.py index ade7545ac..f13ecb075 100644 --- a/custom_model_runner/datarobot_drum/drum/runtime.py +++ b/custom_model_runner/datarobot_drum/drum/runtime.py @@ -26,11 +26,12 @@ class DrumRuntime: - def __init__(self): + def __init__(self, app): self.initialization_succeeded = False self.options = None self.cm_runner = None self.trace_provider = None + self.app = app def __enter__(self): return self @@ -83,12 +84,12 @@ def __exit__(self, exc_type, exc_value, exc_traceback): port = int(host_port_list[1]) if len(host_port_list) == 2 else None with verbose_stdout(self.options.verbose): - run_error_server(host, port, exc_value) + run_error_server(host, port, exc_value, self.app) return False # propagate exception further -def run_error_server(host, port, exc_value): +def run_error_server(host, port, exc_value, app): model_api = empty_api_blueprint() @model_api.route("/", methods=["GET"]) @@ -108,6 +109,10 @@ def predict(): @model_api.route("/transform/", methods=["POST"]) def transform(): return {"message": "ERROR: {}".format(exc_value)}, HTTP_513_DRUM_PIPELINE_ERROR - - app = get_flask_app(model_api) + print("1rrrrrrr") + print("1rrrrrrr") + print("2rrrrrrr") + print("2rrrrrrr") + print(f"rrrrrrrr{host}, {port}") + app = get_flask_app(model_api, app) app.run(host, port) diff --git a/custom_model_runner/datarobot_drum/drum/server.py b/custom_model_runner/datarobot_drum/drum/server.py index 1fd30e9b5..2b02b5eb0 100644 --- a/custom_model_runner/datarobot_drum/drum/server.py +++ b/custom_model_runner/datarobot_drum/drum/server.py @@ -29,8 +29,7 @@ logger = get_drum_logger(LOGGER_NAME_PREFIX) -def get_flask_app(api_blueprint): - app = create_flask_app() +def get_flask_app(api_blueprint, app): url_prefix = os.environ.get(URL_PREFIX_ENV_VAR_NAME, "") app.register_blueprint(api_blueprint, url_prefix=url_prefix) return app @@ -94,6 +93,4 @@ def after_request(response): def create_flask_app(): flask_app = Flask(__name__) - flask_app.before_request(before_request) - flask_app.after_request(after_request) return flask_app diff --git a/public_dropin_environments/python311_genai_agents/run_agent.py b/public_dropin_environments/python311_genai_agents/run_agent.py index a645e22b6..cf99d2828 100644 --- a/public_dropin_environments/python311_genai_agents/run_agent.py +++ b/public_dropin_environments/python311_genai_agents/run_agent.py @@ -255,6 +255,7 @@ def execute_drum( def execute_drum_inline( chat_completion: CompletionCreateParamsBase, custom_model_dir: Path, + app ) -> ChatCompletion: root.info("Executing agent as [chat] endpoint. DRUM Inline Executor.") @@ -263,6 +264,7 @@ def execute_drum_inline( target_type=TargetType.AGENTIC_WORKFLOW.value, custom_model_dir=custom_model_dir, target_name="response", + app ) as predictor: root.info("Executing Agent") completion = predictor.chat(chat_completion) @@ -308,6 +310,7 @@ def run_agent_procedure(args: Any) -> None: result = execute_drum_inline( chat_completion=chat_completion, custom_model_dir=args.custom_model_dir, + app, ) else: result = execute_drum( diff --git a/public_dropin_nim_environments/nim_sidecar/Dockerfile b/public_dropin_nim_environments/nim_sidecar/Dockerfile index 75a314df3..16774a254 100644 --- a/public_dropin_nim_environments/nim_sidecar/Dockerfile +++ b/public_dropin_nim_environments/nim_sidecar/Dockerfile @@ -1,6 +1,6 @@ # This is a private chain-guard development image that is stored in DataRobot's private registry. # Replace it with your own development chain-gaurd image if you build your own. -FROM datarobotdev/mirror_chainguard_datarobot.com_python-fips:3.11-dev as build +FROM datarobotdev/mirror_chainguard_datarobot.com_python-fips:3.11-dev AS build ENV VIRTUAL_ENV=/opt/venv USER root @@ -9,7 +9,8 @@ RUN python -m venv ${VIRTUAL_ENV} && \ COPY requirements.txt requirements.txt RUN ${VIRTUAL_ENV}/bin/python -m pip install -r requirements.txt - +COPY datarobot_drum-1.16.21-py3-none-any.whl /tmp/ +RUN ${VIRTUAL_ENV}/bin/python -m pip install /tmp/datarobot_drum-1.16.21-py3-none-any.whl # This is a private production chain-guard image that is stored in DataRobot's private registry. # Replace it with your own production chain-gaurd image if you build your own. @@ -45,6 +46,9 @@ ENV CODE_DIR=/opt/code ENV ADDRESS=0.0.0.0:8080 ENV WITH_ERROR_SERVER=1 +RUN mkdir -p ${CODE_DIR} \ + && chmod a+rwX ${CODE_DIR} + # This makes print statements show up in the logs API ENV PYTHONUNBUFFERED=1 diff --git a/public_dropin_nim_environments/nim_sidecar/app.py b/public_dropin_nim_environments/nim_sidecar/app.py new file mode 100644 index 000000000..b37d28997 --- /dev/null +++ b/public_dropin_nim_environments/nim_sidecar/app.py @@ -0,0 +1,32 @@ +# Import DRUM's WSGI application +import sys + +from datarobot_drum.drum.main import main +from datarobot_drum.drum.server import create_flask_app + +# You may need to configure the app with the same settings +# that would normally be set by command line arguments +import os +#os.environ["DRUM_SIDECAR"] = "true" +#os.environ["DRUM_GPU_PREDICTOR"] = "nim" +'''os.environ["TARGET_TYPE"] = "textgeneration" +os.environ["ALLOW_DR_API_ACCESS_FOR_ALL_CUSTOM_MODELS"] = "true" +os.environ["ENABLE_CUSTOM_MODEL_RUNTIME_ENV_DUMP"] = "0" +os.environ["EXTERNAL_WEB_SERVER_URL"] = "http://127.0.0.1" +os.environ["DATAROBOT_ENDPOINT"] = "http://127.0.0.1/api/v2" +os.environ["MLOPS_DEPLOYMENT_ID"] = "a2fde18c5458caba0267c" +os.environ["MLOPS_MODEL_ID"] = "689a2fae18c5458caba02677" +os.environ["TARGET_NAME"] = "resultText" +os.environ["API_TOKEN"] = "resultText"''' + + +sys.argv = [ + "drum","server", # Program name + "--sidecar","--gpu-predictor=nim", "--logging-level=info" +] +import traceback + +app = create_flask_app() +app2 = create_flask_app() +main(app, app) + diff --git a/public_dropin_nim_environments/nim_sidecar/env_info.json b/public_dropin_nim_environments/nim_sidecar/env_info.json index 5b7319fd1..eedbed5c5 100644 --- a/public_dropin_nim_environments/nim_sidecar/env_info.json +++ b/public_dropin_nim_environments/nim_sidecar/env_info.json @@ -4,7 +4,7 @@ "description": "", "programmingLanguage": "python", "label": "", - "environmentVersionId": "6848a84e0c7c49131250fdd4", + "environmentVersionId": "6848a84e0c7c49131250ffd4", "environmentVersionDescription": "Run with 10 HTTP workers by default", "isPublic": true, "useCases": [ diff --git a/public_dropin_nim_environments/nim_sidecar/requirements.in b/public_dropin_nim_environments/nim_sidecar/requirements.in index 598d987b5..ade32e2e3 100644 --- a/public_dropin_nim_environments/nim_sidecar/requirements.in +++ b/public_dropin_nim_environments/nim_sidecar/requirements.in @@ -3,4 +3,5 @@ datarobot-mlops datarobot-mlops-connected-client datarobot-drum openai>=1.17.0 +gunicorn opentelemetry-instrumentation-openai diff --git a/public_dropin_nim_environments/nim_sidecar/requirements.txt b/public_dropin_nim_environments/nim_sidecar/requirements.txt index 71133d457..ca80800cd 100644 --- a/public_dropin_nim_environments/nim_sidecar/requirements.txt +++ b/public_dropin_nim_environments/nim_sidecar/requirements.txt @@ -4,113 +4,113 @@ # # pip-compile --index-url=https://pypi.org/simple --no-annotate --no-emit-index-url --no-emit-trusted-host --output-file=requirements.txt requirements.in # -aiohappyeyeballs==2.4.6 -aiohttp==3.11.13 -aiosignal==1.3.2 +aiohappyeyeballs==2.6.1 +aiohttp==3.12.15 +aiosignal==1.4.0 annotated-types==0.7.0 -anyio==4.8.0 -argcomplete==3.5.3 +anyio==4.10.0 +argcomplete==3.6.2 async-timeout==5.0.1 -attrs==25.1.0 -azure-core==1.32.0 -azure-identity==1.20.0 +attrs==25.3.0 +azure-core==1.35.0 +azure-identity==1.24.0 azure-storage-blob==12.19.0 -backoff==2.2.1 blinker==1.9.0 -boto3==1.37.1 -botocore==1.37.1 -cachetools==4.2.4 -certifi==2025.1.31 +boto3==1.40.7 +botocore==1.40.7 +cachetools==5.5.2 +certifi==2025.8.3 cffi==1.17.1 -charset-normalizer==3.4.1 -click==8.1.8 -cryptography==44.0.1 -datarobot==3.6.3 -datarobot-drum==1.16.17 -datarobot-mlops==11.1.0a3 -datarobot-mlops-connected-client==11.1.0a3 -datarobot-storage==0.0.0 -deprecated==1.2.18 +charset-normalizer==3.4.3 +click==8.2.1 +cryptography==45.0.6 +datarobot==3.8.2 +datarobot-mlops==11.1.0 +datarobot-mlops-connected-client==11.1.0 +datarobot-storage==2.2.0 distro==1.9.0 docker==7.1.0 -exceptiongroup==1.2.2 +exceptiongroup==1.3.0 filechunkio==1.8 -flask==3.1.0 -frozenlist==1.5.0 -google-api-core==1.34.0 -google-auth==1.28.1 -google-cloud-core==2.4.2 -google-cloud-storage==1.43.0 -google-crc32c==1.6.0 +flask==3.1.1 +frozenlist==1.7.0 +google-api-core==2.25.1 +google-auth==2.40.3 +google-cloud-core==2.4.3 +google-cloud-storage==2.19.0 +google-crc32c==1.7.1 google-resumable-media==2.7.2 -googleapis-common-protos==1.68.0 -h11==0.14.0 -httpcore==1.0.7 +googleapis-common-protos==1.70.0 +gunicorn==23.0.0 +h11==0.16.0 +httpcore==1.0.9 httpx==0.28.1 idna==3.10 +importlib-metadata==8.7.0 isodate==0.7.2 itsdangerous==2.2.0 -jinja2==3.1.5 -jiter==0.8.2 +jinja2==3.1.6 +jiter==0.10.0 jmespath==1.0.1 julia==0.5.7 markupsafe==3.0.2 memory-profiler==0.61.0 -msal==1.31.1 -msal-extensions==1.2.0 -multidict==6.1.0 -mypy-extensions==1.0.0 -numpy==2.0.2 -openai==1.64.0 -opentelemetry-api==1.16.0 -opentelemetry-exporter-otlp-proto-http==1.16.0 -opentelemetry-instrumentation-aiohttp-client==0.37b0 -opentelemetry-instrumentation-openai==0.37b0 -opentelemetry-instrumentation-requests==0.37b0 -opentelemetry-instrumentation==0.37b0 -opentelemetry-proto==1.16.0 -opentelemetry-sdk==1.16.0 -opentelemetry-semantic-conventions==0.37b0 -opentelemetry-util-http==0.37b0 -orjson==3.10.15 -packaging==24.2 -pandas==2.2.3 -pillow==11.1.0 -portalocker==2.10.1 -progress==1.6 -propcache==0.3.0 -protobuf==3.20.3 +msal==1.33.0 +msal-extensions==1.3.1 +multidict==6.6.4 +mypy-extensions==1.1.0 +numpy==2.2.6 +openai==1.99.9 +opentelemetry-api==1.36.0 +opentelemetry-exporter-otlp-proto-common==1.36.0 +opentelemetry-exporter-otlp-proto-http==1.36.0 +opentelemetry-instrumentation==0.57b0 +opentelemetry-instrumentation-aiohttp-client==0.57b0 +opentelemetry-instrumentation-openai==0.44.2 +opentelemetry-instrumentation-requests==0.57b0 +opentelemetry-proto==1.36.0 +opentelemetry-sdk==1.36.0 +opentelemetry-semantic-conventions==0.57b0 +opentelemetry-semantic-conventions-ai==0.4.11 +opentelemetry-util-http==0.57b0 +orjson==3.11.1 +packaging==25.0 +pandas==2.3.1 +pillow==11.3.0 +progress==1.6.1 +propcache==0.3.2 +proto-plus==1.26.1 +protobuf==6.31.1 psutil==7.0.0 py4j==0.10.9.9 pyasn1==0.6.1 -pyasn1-modules==0.4.1 +pyasn1-modules==0.4.2 pycparser==2.22 -pydantic==2.10.6 -pydantic-core==2.27.2 +pydantic==2.11.7 +pydantic-core==2.33.2 pyjwt[crypto]==2.10.1 python-dateutil==2.9.0.post0 -pytz==2025.1 +pytz==2025.2 pyyaml==6.0.2 -requests==2.32.3 +requests==2.32.4 requests-toolbelt==1.0.0 -rsa==4.9 +rsa==4.9.1 ruamel-yaml==0.17.4 -s3transfer==0.11.2 -scipy==1.13.1 +s3transfer==0.13.1 +scipy==1.15.3 six==1.17.0 sniffio==1.3.1 strenum==0.4.15 strictyaml==1.4.2 -termcolor==2.5.0 +termcolor==3.1.0 texttable==1.7.0 tqdm==4.67.1 trafaret==2.1.1 -typing-extensions==4.12.2 -tzdata==2025.1 -urllib3==1.26.20 +typing-extensions==4.14.1 +typing-inspection==0.4.1 +tzdata==2025.2 +urllib3==2.5.0 werkzeug==3.1.3 wrapt==1.17.2 -yarl==1.18.3 - -# The following packages are considered to be unsafe in a requirements file: -# setuptools +yarl==1.20.1 +zipp==3.23.0 diff --git a/public_dropin_nim_environments/nim_sidecar/start_server.sh b/public_dropin_nim_environments/nim_sidecar/start_server.sh index 82a2ffa69..72e7efba4 100755 --- a/public_dropin_nim_environments/nim_sidecar/start_server.sh +++ b/public_dropin_nim_environments/nim_sidecar/start_server.sh @@ -8,7 +8,6 @@ echo "Starting Custom Model environment with NIM" set -e - if [ "${ENABLE_CUSTOM_MODEL_RUNTIME_ENV_DUMP}" = 1 ]; then echo "Environment variables:" env @@ -17,4 +16,14 @@ fi echo echo "Starting DRUM server..." echo -exec drum server --sidecar --gpu-predictor=nim --logging-level=info "$@" + +#exec gunicorn app:app --workers=4 --bind=0.0.0.0:8080 --backlog=4 & OK 20 Workers 30 parallel request 8 mb +#exec gunicorn app:app --workers=4 --bind=0.0.0.0:8080 --backlog=32 & probably ok +#exec gunicorn app:app --workers=32 --bind=0.0.0.0:8080 --backlog=512 & #failed (stopped) +#exec gunicorn app:app --workers=32 --bind=0.0.0.0:8080 --backlog=256 & #failing... +exec gunicorn app:app -k gthread --workers=32 --bind=0.0.0.0:8080 --backlog=512 --threads 4 --timeout 120 --max-requests 1000 --max-requests-jitter 100 & #works +#exec gunicorn app:app -k gthread --workers=128 --bind=0.0.0.0:8080 --backlog=512 --threads 4 --timeout 120 & #failed +#exec gunicorn app:app2 --workers=1 --bind=0.0.0.0:8080 & + +# Wait for both processes +wait \ No newline at end of file diff --git a/public_dropin_nim_environments/rr b/public_dropin_nim_environments/rr new file mode 100644 index 000000000..2d62682e1 --- /dev/null +++ b/public_dropin_nim_environments/rr @@ -0,0 +1,12 @@ + python3 ./tools/migrate_templates.py --admin-api-token Njg4ZDkxMWQ5OThlNGQ0MDVmMmVkZjU3OmI3VEduMlNuM2kzMlZURm5CK1hpbEhQSlUyWi8wZS9uMUlZQ08yaDMzVkE9 --dr-host https://datarobot.apps.datarobot.datarobot.oci-dev.datarobot.com/ --template-types models --no-update-exec-envs --force --sleep-seconds 0 + + docker buildx build --platform linux/amd64 -t sergeygavrenkov123/drum_gunicorn:latest --push . + +cd custom_model_runner +make wheel + +workon drum +docker buildx build --platform linux/amd64 -t sergeygavrenkov123/drum_gunicorn:latest --push . + docker buildx build --platform linux/amd64 -t sergeygavrenkov123/drum_gunicorn:latest --push . + + From 209c4f9a18fed40a467d91d829cc51defe888b85 Mon Sep 17 00:00:00 2001 From: Sergey Gavrenkov Date: Wed, 20 Aug 2025 02:05:20 -0400 Subject: [PATCH 02/36] fixed --- .../datarobot_drum/drum/drum.py | 15 +++++++------- .../datarobot_drum/drum/main.py | 9 ++++----- .../drum/root_predictors/prediction_server.py | 15 +++++++------- .../datarobot_drum/drum/runtime.py | 11 +++++----- .../datarobot_drum/drum/server.py | 2 ++ .../nim_sidecar/app.py | 20 +------------------ .../nim_sidecar/start_server.sh | 10 +--------- 7 files changed, 29 insertions(+), 53 deletions(-) diff --git a/custom_model_runner/datarobot_drum/drum/drum.py b/custom_model_runner/datarobot_drum/drum/drum.py index 08e94b5a9..5bb5d0960 100644 --- a/custom_model_runner/datarobot_drum/drum/drum.py +++ b/custom_model_runner/datarobot_drum/drum/drum.py @@ -77,8 +77,9 @@ class CMRunner: - def __init__(self, runtime): + def __init__(self, runtime, app=None): self.runtime = runtime + self.app = app self.options = runtime.options self.options.model_config = read_model_metadata_yaml(self.options.code_dir) self.options.default_parameter_values = ( @@ -461,7 +462,7 @@ def raise_multiple_custom_files(py_paths, r_paths, jl_paths): run_language = RunLanguage.PYTHON return run_language - def run(self, app): + def run(self): try: if self.options.docker and ( self.run_mode not in (RunMode.PUSH, RunMode.PERF_TEST, RunMode.VALIDATION) @@ -495,14 +496,14 @@ def run(self, app): stats_collector.enable() try: with self._setup_output_if_not_exists(): - self._run_predictions(app, stats_collector) + self._run_predictions(stats_collector) finally: if stats_collector: stats_collector.disable() if stats_collector: stats_collector.print_reports() elif self.run_mode == RunMode.SERVER: - self._run_predictions(app) + self._run_predictions() elif self.run_mode == RunMode.FIT: self.run_fit() elif self.run_mode == RunMode.PERF_TEST: @@ -812,7 +813,7 @@ def get_predictor_params(self): raise DrumCommonException("Arguments are missing in the pipeline") return pipeline["pipe"][0]["arguments"] - def _run_predictions(self, app, stats_collector: Optional[StatsCollector] = None): + def _run_predictions(self, stats_collector: Optional[StatsCollector] = None): if self.run_mode not in [RunMode.SCORE, RunMode.SERVER]: raise NotImplemented(f"The given run mode is supported here: {self.run_mode}") @@ -826,13 +827,13 @@ def _run_predictions(self, app, stats_collector: Optional[StatsCollector] = None if stats_collector: stats_collector.mark("start") predictor = ( - PredictionServer(params) + PredictionServer(params, self.app) if self.run_mode == RunMode.SERVER else GenericPredictorComponent(params) ) if stats_collector: stats_collector.mark("init") - predictor.materialize(app) + predictor.materialize() if stats_collector: stats_collector.mark("run") finally: diff --git a/custom_model_runner/datarobot_drum/drum/main.py b/custom_model_runner/datarobot_drum/drum/main.py index 9f18ad5f8..e93c627a0 100644 --- a/custom_model_runner/datarobot_drum/drum/main.py +++ b/custom_model_runner/datarobot_drum/drum/main.py @@ -59,9 +59,8 @@ ) -def main(app, app1): - print(sys.argv) - with DrumRuntime(app1) as runtime: +def main(app): + with DrumRuntime(app) as runtime: config_logging() def signal_handler(sig, frame): @@ -116,8 +115,8 @@ def signal_handler(sig, frame): from datarobot_drum.drum.drum import CMRunner try: - runtime.cm_runner = CMRunner(runtime) - runtime.cm_runner.run(app) + runtime.cm_runner = CMRunner(runtime, app) + runtime.cm_runner.run() except DrumSchemaValidationException: sys.exit(ExitCodes.SCHEMA_VALIDATION_ERROR.value) diff --git a/custom_model_runner/datarobot_drum/drum/root_predictors/prediction_server.py b/custom_model_runner/datarobot_drum/drum/root_predictors/prediction_server.py index 61d4113f5..442070a17 100644 --- a/custom_model_runner/datarobot_drum/drum/root_predictors/prediction_server.py +++ b/custom_model_runner/datarobot_drum/drum/root_predictors/prediction_server.py @@ -53,8 +53,9 @@ class PredictionServer(PredictMixin): - def __init__(self, params: dict): + def __init__(self, params: dict, app=None): self._params = params + self.app = app self._show_perf = self._params.get("show_perf") self._resource_monitor = ResourceMonitor(monitor_current_process=True) self._run_language = RunLanguage(params.get("run_language")) @@ -136,7 +137,7 @@ def _post_predict_and_transform(self): self._stats_collector.disable() self._stdout_flusher.set_last_activity_time() - def materialize(self, app): + def materialize(self): model_api = base_api_blueprint(self._terminate, self._predictor) @model_api.route("/capabilities/", methods=["GET"]) @@ -283,7 +284,7 @@ def handle_exception(e): cli = sys.modules["flask.cli"] cli.show_server_banner = lambda *x: None - app = get_flask_app(model_api, app) + app = get_flask_app(model_api, self.app) self.load_flask_extensions(app) self._run_flask_app(app) @@ -301,10 +302,10 @@ def _run_flask_app(self, app): processes = self._params.get("processes") logger.info("Number of webserver processes: %s", processes) try: - pass - #logger.info("hhhhhhhhh Host and port: %s", host + port) - print(f"hhhhhhhhh{host}, {port}") - #app.run(host, port, threaded=False, processes=processes) + if self.app: + pass + else: + app.run(host, port, threaded=False, processes=processes) except OSError as e: raise DrumCommonException("{}: host: {}; port: {}".format(e, host, port)) diff --git a/custom_model_runner/datarobot_drum/drum/runtime.py b/custom_model_runner/datarobot_drum/drum/runtime.py index f13ecb075..d0814cd3e 100644 --- a/custom_model_runner/datarobot_drum/drum/runtime.py +++ b/custom_model_runner/datarobot_drum/drum/runtime.py @@ -109,10 +109,9 @@ def predict(): @model_api.route("/transform/", methods=["POST"]) def transform(): return {"message": "ERROR: {}".format(exc_value)}, HTTP_513_DRUM_PIPELINE_ERROR - print("1rrrrrrr") - print("1rrrrrrr") - print("2rrrrrrr") - print("2rrrrrrr") print(f"rrrrrrrr{host}, {port}") - app = get_flask_app(model_api, app) - app.run(host, port) + if app: + pass + else: + app = get_flask_app(model_api, app) + app.run(host, port) diff --git a/custom_model_runner/datarobot_drum/drum/server.py b/custom_model_runner/datarobot_drum/drum/server.py index 2b02b5eb0..029271aa3 100644 --- a/custom_model_runner/datarobot_drum/drum/server.py +++ b/custom_model_runner/datarobot_drum/drum/server.py @@ -93,4 +93,6 @@ def after_request(response): def create_flask_app(): flask_app = Flask(__name__) + flask_app.before_request(before_request) + flask_app.after_request(after_request) return flask_app diff --git a/public_dropin_nim_environments/nim_sidecar/app.py b/public_dropin_nim_environments/nim_sidecar/app.py index b37d28997..1ebe96cd4 100644 --- a/public_dropin_nim_environments/nim_sidecar/app.py +++ b/public_dropin_nim_environments/nim_sidecar/app.py @@ -4,29 +4,11 @@ from datarobot_drum.drum.main import main from datarobot_drum.drum.server import create_flask_app -# You may need to configure the app with the same settings -# that would normally be set by command line arguments -import os -#os.environ["DRUM_SIDECAR"] = "true" -#os.environ["DRUM_GPU_PREDICTOR"] = "nim" -'''os.environ["TARGET_TYPE"] = "textgeneration" -os.environ["ALLOW_DR_API_ACCESS_FOR_ALL_CUSTOM_MODELS"] = "true" -os.environ["ENABLE_CUSTOM_MODEL_RUNTIME_ENV_DUMP"] = "0" -os.environ["EXTERNAL_WEB_SERVER_URL"] = "http://127.0.0.1" -os.environ["DATAROBOT_ENDPOINT"] = "http://127.0.0.1/api/v2" -os.environ["MLOPS_DEPLOYMENT_ID"] = "a2fde18c5458caba0267c" -os.environ["MLOPS_MODEL_ID"] = "689a2fae18c5458caba02677" -os.environ["TARGET_NAME"] = "resultText" -os.environ["API_TOKEN"] = "resultText"''' - - sys.argv = [ "drum","server", # Program name "--sidecar","--gpu-predictor=nim", "--logging-level=info" ] -import traceback app = create_flask_app() -app2 = create_flask_app() -main(app, app) +main(app) diff --git a/public_dropin_nim_environments/nim_sidecar/start_server.sh b/public_dropin_nim_environments/nim_sidecar/start_server.sh index 72e7efba4..fae52f21d 100755 --- a/public_dropin_nim_environments/nim_sidecar/start_server.sh +++ b/public_dropin_nim_environments/nim_sidecar/start_server.sh @@ -17,13 +17,5 @@ echo echo "Starting DRUM server..." echo -#exec gunicorn app:app --workers=4 --bind=0.0.0.0:8080 --backlog=4 & OK 20 Workers 30 parallel request 8 mb -#exec gunicorn app:app --workers=4 --bind=0.0.0.0:8080 --backlog=32 & probably ok -#exec gunicorn app:app --workers=32 --bind=0.0.0.0:8080 --backlog=512 & #failed (stopped) -#exec gunicorn app:app --workers=32 --bind=0.0.0.0:8080 --backlog=256 & #failing... -exec gunicorn app:app -k gthread --workers=32 --bind=0.0.0.0:8080 --backlog=512 --threads 4 --timeout 120 --max-requests 1000 --max-requests-jitter 100 & #works -#exec gunicorn app:app -k gthread --workers=128 --bind=0.0.0.0:8080 --backlog=512 --threads 4 --timeout 120 & #failed -#exec gunicorn app:app2 --workers=1 --bind=0.0.0.0:8080 & - -# Wait for both processes +exec gunicorn app:app -k gthread --workers=8 --bind=0.0.0.0:8080 --backlog=512 --threads 4 --timeout 120 --max-requests 1000 --max-requests-jitter 100 --log-level=info --access-logfile - --access-logformat '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s"'& #works wait \ No newline at end of file From 1251e1adcbcaa205ed274f2a2cd9aa5bc1949d01 Mon Sep 17 00:00:00 2001 From: Sergey Gavrenkov Date: Wed, 20 Aug 2025 02:06:47 -0400 Subject: [PATCH 03/36] removed rr --- public_dropin_nim_environments/rr | 12 ------------ 1 file changed, 12 deletions(-) delete mode 100644 public_dropin_nim_environments/rr diff --git a/public_dropin_nim_environments/rr b/public_dropin_nim_environments/rr deleted file mode 100644 index 2d62682e1..000000000 --- a/public_dropin_nim_environments/rr +++ /dev/null @@ -1,12 +0,0 @@ - python3 ./tools/migrate_templates.py --admin-api-token Njg4ZDkxMWQ5OThlNGQ0MDVmMmVkZjU3OmI3VEduMlNuM2kzMlZURm5CK1hpbEhQSlUyWi8wZS9uMUlZQ08yaDMzVkE9 --dr-host https://datarobot.apps.datarobot.datarobot.oci-dev.datarobot.com/ --template-types models --no-update-exec-envs --force --sleep-seconds 0 - - docker buildx build --platform linux/amd64 -t sergeygavrenkov123/drum_gunicorn:latest --push . - -cd custom_model_runner -make wheel - -workon drum -docker buildx build --platform linux/amd64 -t sergeygavrenkov123/drum_gunicorn:latest --push . - docker buildx build --platform linux/amd64 -t sergeygavrenkov123/drum_gunicorn:latest --push . - - From 7a2b0bc0ab0140bc1a875f75faecc78ed8bd3070 Mon Sep 17 00:00:00 2001 From: svc-harness-git2 Date: Wed, 20 Aug 2025 06:12:44 +0000 Subject: [PATCH 04/36] Reconcile dependencies, updated IDs, tags --- public_dropin_nim_environments/nim_sidecar/env_info.json | 7 ++++--- .../nim_sidecar/requirements.txt | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/public_dropin_nim_environments/nim_sidecar/env_info.json b/public_dropin_nim_environments/nim_sidecar/env_info.json index eedbed5c5..a466cea9c 100644 --- a/public_dropin_nim_environments/nim_sidecar/env_info.json +++ b/public_dropin_nim_environments/nim_sidecar/env_info.json @@ -7,14 +7,15 @@ "environmentVersionId": "6848a84e0c7c49131250ffd4", "environmentVersionDescription": "Run with 10 HTTP workers by default", "isPublic": true, + "isDownloadable": true, "useCases": [ "customModel" ], "contextUrl": "https://github.com/datarobot/datarobot-user-models/tree/master/public_dropin_nim_environments/nim_sidecar", "imageRepository": "env-nim-sidecar", "tags": [ - "v11.1.0-6848b6572081a81ac56c7a0b", - "6848b6572081a81ac56c7a0b", - "v11.1.0-latest" + "v11.2.0-6848a84e0c7c49131250ffd4", + "6848a84e0c7c49131250ffd4", + "v11.2.0-latest" ] } diff --git a/public_dropin_nim_environments/nim_sidecar/requirements.txt b/public_dropin_nim_environments/nim_sidecar/requirements.txt index ca80800cd..5a708e6d5 100644 --- a/public_dropin_nim_environments/nim_sidecar/requirements.txt +++ b/public_dropin_nim_environments/nim_sidecar/requirements.txt @@ -25,6 +25,7 @@ charset-normalizer==3.4.3 click==8.2.1 cryptography==45.0.6 datarobot==3.8.2 +datarobot-drum==1.16.23 datarobot-mlops==11.1.0 datarobot-mlops-connected-client==11.1.0 datarobot-storage==2.2.0 From e75d3e4f5e7afa62e93cce0e7f805b46a1089528 Mon Sep 17 00:00:00 2001 From: Sergey Gavrenkov Date: Wed, 20 Aug 2025 23:49:08 -0400 Subject: [PATCH 05/36] working --- public_dropin_nim_environments/nim_sidecar/app.py | 5 ++++- public_dropin_nim_environments/nim_sidecar/requirements.in | 3 ++- public_dropin_nim_environments/nim_sidecar/requirements.txt | 1 - public_dropin_nim_environments/nim_sidecar/start_server.sh | 2 +- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/public_dropin_nim_environments/nim_sidecar/app.py b/public_dropin_nim_environments/nim_sidecar/app.py index 1ebe96cd4..09d0a35ef 100644 --- a/public_dropin_nim_environments/nim_sidecar/app.py +++ b/public_dropin_nim_environments/nim_sidecar/app.py @@ -1,6 +1,9 @@ # Import DRUM's WSGI application -import sys +from gevent import monkey +monkey.patch_all() + +import sys from datarobot_drum.drum.main import main from datarobot_drum.drum.server import create_flask_app diff --git a/public_dropin_nim_environments/nim_sidecar/requirements.in b/public_dropin_nim_environments/nim_sidecar/requirements.in index ade32e2e3..309a23033 100644 --- a/public_dropin_nim_environments/nim_sidecar/requirements.in +++ b/public_dropin_nim_environments/nim_sidecar/requirements.in @@ -3,5 +3,6 @@ datarobot-mlops datarobot-mlops-connected-client datarobot-drum openai>=1.17.0 -gunicorn +gunicorn>=23.0.0 +gevent>=25.5.1 opentelemetry-instrumentation-openai diff --git a/public_dropin_nim_environments/nim_sidecar/requirements.txt b/public_dropin_nim_environments/nim_sidecar/requirements.txt index 5a708e6d5..ca80800cd 100644 --- a/public_dropin_nim_environments/nim_sidecar/requirements.txt +++ b/public_dropin_nim_environments/nim_sidecar/requirements.txt @@ -25,7 +25,6 @@ charset-normalizer==3.4.3 click==8.2.1 cryptography==45.0.6 datarobot==3.8.2 -datarobot-drum==1.16.23 datarobot-mlops==11.1.0 datarobot-mlops-connected-client==11.1.0 datarobot-storage==2.2.0 diff --git a/public_dropin_nim_environments/nim_sidecar/start_server.sh b/public_dropin_nim_environments/nim_sidecar/start_server.sh index fae52f21d..8f92362db 100755 --- a/public_dropin_nim_environments/nim_sidecar/start_server.sh +++ b/public_dropin_nim_environments/nim_sidecar/start_server.sh @@ -17,5 +17,5 @@ echo echo "Starting DRUM server..." echo -exec gunicorn app:app -k gthread --workers=8 --bind=0.0.0.0:8080 --backlog=512 --threads 4 --timeout 120 --max-requests 1000 --max-requests-jitter 100 --log-level=info --access-logfile - --access-logformat '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s"'& #works +exec gunicorn app:app --worker-class gevent --workers=8 --bind=0.0.0.0:8080 --backlog=512 --timeout 120 --max-requests 700 --max-requests-jitter 400 --log-level=info --access-logfile - --access-logformat '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s"'& #works wait \ No newline at end of file From 52541cd0617261d69598635f9308061b4ceac415 Mon Sep 17 00:00:00 2001 From: Sergey Gavrenkov Date: Thu, 21 Aug 2025 16:43:59 -0400 Subject: [PATCH 06/36] working --- .../datarobot_drum/drum/description.py | 2 +- .../nim_sidecar/app.py | 3 --- .../nim_sidecar/requirements.txt | 25 +++++++++++++------ 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/custom_model_runner/datarobot_drum/drum/description.py b/custom_model_runner/datarobot_drum/drum/description.py index d0d3b6800..83a754e6c 100644 --- a/custom_model_runner/datarobot_drum/drum/description.py +++ b/custom_model_runner/datarobot_drum/drum/description.py @@ -4,6 +4,6 @@ This is proprietary source code of DataRobot, Inc. and its affiliates. Released under the terms of DataRobot Tool and Utility Agreement. """ -version = "1.16.21" +version = "1.16.24" __version__ = version project_name = "datarobot-drum" diff --git a/public_dropin_nim_environments/nim_sidecar/app.py b/public_dropin_nim_environments/nim_sidecar/app.py index 09d0a35ef..3de0b2ce2 100644 --- a/public_dropin_nim_environments/nim_sidecar/app.py +++ b/public_dropin_nim_environments/nim_sidecar/app.py @@ -1,8 +1,5 @@ # Import DRUM's WSGI application -from gevent import monkey -monkey.patch_all() - import sys from datarobot_drum.drum.main import main from datarobot_drum.drum.server import create_flask_app diff --git a/public_dropin_nim_environments/nim_sidecar/requirements.txt b/public_dropin_nim_environments/nim_sidecar/requirements.txt index ca80800cd..0735245bc 100644 --- a/public_dropin_nim_environments/nim_sidecar/requirements.txt +++ b/public_dropin_nim_environments/nim_sidecar/requirements.txt @@ -18,6 +18,8 @@ azure-storage-blob==12.19.0 blinker==1.9.0 boto3==1.40.7 botocore==1.40.7 +boto3==1.40.13 +botocore==1.40.13 cachetools==5.5.2 certifi==2025.8.3 cffi==1.17.1 @@ -32,8 +34,9 @@ distro==1.9.0 docker==7.1.0 exceptiongroup==1.3.0 filechunkio==1.8 -flask==3.1.1 +flask==3.1.2 frozenlist==1.7.0 +gevent==25.5.1 google-api-core==2.25.1 google-auth==2.40.3 google-cloud-core==2.4.3 @@ -41,6 +44,7 @@ google-cloud-storage==2.19.0 google-crc32c==1.7.1 google-resumable-media==2.7.2 googleapis-common-protos==1.70.0 +greenlet==3.2.4 gunicorn==23.0.0 h11==0.16.0 httpcore==1.0.9 @@ -60,27 +64,27 @@ msal-extensions==1.3.1 multidict==6.6.4 mypy-extensions==1.1.0 numpy==2.2.6 -openai==1.99.9 +openai==1.100.2 opentelemetry-api==1.36.0 opentelemetry-exporter-otlp-proto-common==1.36.0 opentelemetry-exporter-otlp-proto-http==1.36.0 opentelemetry-instrumentation==0.57b0 opentelemetry-instrumentation-aiohttp-client==0.57b0 -opentelemetry-instrumentation-openai==0.44.2 +opentelemetry-instrumentation-openai==0.45.6 opentelemetry-instrumentation-requests==0.57b0 opentelemetry-proto==1.36.0 opentelemetry-sdk==1.36.0 opentelemetry-semantic-conventions==0.57b0 -opentelemetry-semantic-conventions-ai==0.4.11 +opentelemetry-semantic-conventions-ai==0.4.12 opentelemetry-util-http==0.57b0 -orjson==3.11.1 +orjson==3.11.2 packaging==25.0 pandas==2.3.1 pillow==11.3.0 progress==1.6.1 propcache==0.3.2 proto-plus==1.26.1 -protobuf==6.31.1 +protobuf==6.32.0 psutil==7.0.0 py4j==0.10.9.9 pyasn1==0.6.1 @@ -92,7 +96,7 @@ pyjwt[crypto]==2.10.1 python-dateutil==2.9.0.post0 pytz==2025.2 pyyaml==6.0.2 -requests==2.32.4 +requests==2.32.5 requests-toolbelt==1.0.0 rsa==4.9.1 ruamel-yaml==0.17.4 @@ -111,6 +115,11 @@ typing-inspection==0.4.1 tzdata==2025.2 urllib3==2.5.0 werkzeug==3.1.3 -wrapt==1.17.2 +wrapt==1.17.3 yarl==1.20.1 zipp==3.23.0 +zope-event==5.1.1 +zope-interface==7.2 + +# The following packages are considered to be unsafe in a requirements file: +# setuptools From 039611f9f135630c97ae517c09e66f4951b71984 Mon Sep 17 00:00:00 2001 From: svc-harness-git2 Date: Thu, 21 Aug 2025 20:54:41 +0000 Subject: [PATCH 07/36] Reconcile dependencies, updated IDs, tags --- public_dropin_nim_environments/nim_sidecar/requirements.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/public_dropin_nim_environments/nim_sidecar/requirements.txt b/public_dropin_nim_environments/nim_sidecar/requirements.txt index 0735245bc..bfdbd4786 100644 --- a/public_dropin_nim_environments/nim_sidecar/requirements.txt +++ b/public_dropin_nim_environments/nim_sidecar/requirements.txt @@ -16,8 +16,6 @@ azure-core==1.35.0 azure-identity==1.24.0 azure-storage-blob==12.19.0 blinker==1.9.0 -boto3==1.40.7 -botocore==1.40.7 boto3==1.40.13 botocore==1.40.13 cachetools==5.5.2 @@ -27,6 +25,7 @@ charset-normalizer==3.4.3 click==8.2.1 cryptography==45.0.6 datarobot==3.8.2 +datarobot-drum==1.16.23 datarobot-mlops==11.1.0 datarobot-mlops-connected-client==11.1.0 datarobot-storage==2.2.0 From 7e45b6232fdf5575027de64116b20ae2919ce280 Mon Sep 17 00:00:00 2001 From: Matt Nitzken Date: Tue, 12 Aug 2025 16:38:22 -0400 Subject: [PATCH 08/36] [BUZZOK-27100] Fix Drum Inline Runner and streamline DRUM options generation to help prevent logic diverging (#1615) * Fix Drum Inline Runner and streamline DRUM options generation * Cleanup * Refactor setup to new file * Fix mocks * Add unit tests * Change tracer --- .../datarobot_drum/drum/common.py | 12 -- .../datarobot_drum/drum/main.py | 30 +--- .../drum/root_predictors/drum_inline_utils.py | 16 ++- .../datarobot_drum/drum/utils/setup.py | 68 +++++++++ tests/unit/datarobot_drum/drum/test_main.py | 8 +- .../datarobot_drum/drum/utils/test_setup.py | 133 ++++++++++++++++++ 6 files changed, 218 insertions(+), 49 deletions(-) create mode 100644 custom_model_runner/datarobot_drum/drum/utils/setup.py create mode 100644 tests/unit/datarobot_drum/drum/utils/test_setup.py diff --git a/custom_model_runner/datarobot_drum/drum/common.py b/custom_model_runner/datarobot_drum/drum/common.py index 24a2a040a..de2667b1d 100644 --- a/custom_model_runner/datarobot_drum/drum/common.py +++ b/custom_model_runner/datarobot_drum/drum/common.py @@ -22,8 +22,6 @@ PayloadFormat, ) from datarobot_drum.drum.exceptions import DrumCommonException -from datarobot_drum.drum.lazy_loading.lazy_loading_handler import LazyLoadingHandler -from datarobot_drum.runtime_parameters.runtime_parameters import RuntimeParametersLoader from opentelemetry import trace, context from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter from opentelemetry.sdk.resources import Resource @@ -32,7 +30,6 @@ from opentelemetry.sdk.trace.export import SimpleSpanProcessor from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator - ctx_request_id = ContextVar("request_id") @@ -232,12 +229,3 @@ def extract_chat_response_attributes(response): # last completion wins attributes["gen_ai.completion"] = m.get("content") return attributes - - -def setup_required_environment_variables(options): - if "runtime_params_file" in options and options.runtime_params_file: - loader = RuntimeParametersLoader(options.runtime_params_file, options.code_dir) - loader.setup_environment_variables() - - if "lazy_loading_file" in options and options.lazy_loading_file: - LazyLoadingHandler.setup_environment_variables_from_values_file(options.lazy_loading_file) diff --git a/custom_model_runner/datarobot_drum/drum/main.py b/custom_model_runner/datarobot_drum/drum/main.py index e93c627a0..bad50276c 100644 --- a/custom_model_runner/datarobot_drum/drum/main.py +++ b/custom_model_runner/datarobot_drum/drum/main.py @@ -43,18 +43,16 @@ import signal import sys -from datarobot_drum.drum.args_parser import CMRunnerArgsRegistry from datarobot_drum.drum.common import ( config_logging, setup_tracer, - setup_required_environment_variables, ) +from datarobot_drum.drum.utils.setup import setup_options from datarobot_drum.drum.enum import RunMode from datarobot_drum.drum.enum import ExitCodes from datarobot_drum.drum.exceptions import DrumSchemaValidationException from datarobot_drum.drum.runtime import DrumRuntime from datarobot_drum.runtime_parameters.runtime_parameters import ( - RuntimeParametersLoader, RuntimeParameters, ) @@ -78,35 +76,13 @@ def signal_handler(sig, frame): os._exit(130) - arg_parser = CMRunnerArgsRegistry.get_arg_parser() - - try: - import argcomplete - except ImportError: - print( - "WARNING: autocompletion of arguments is not supported " - "as 'argcomplete' package is not found", - file=sys.stderr, - ) - else: - # argcomplete call should be as close to the beginning as possible - argcomplete.autocomplete(arg_parser) - - CMRunnerArgsRegistry.extend_sys_argv_with_env_vars() - - options = arg_parser.parse_args() - CMRunnerArgsRegistry.verify_options(options) - try: - setup_required_environment_variables(options) + options = setup_options() + runtime.options = options except Exception as exc: print(str(exc)) exit(255) - if RuntimeParameters.has("CUSTOM_MODEL_WORKERS"): - options.max_workers = RuntimeParameters.get("CUSTOM_MODEL_WORKERS") - runtime.options = options - runtime.trace_provider = setup_tracer(RuntimeParameters, options) signal.signal(signal.SIGINT, signal_handler) diff --git a/custom_model_runner/datarobot_drum/drum/root_predictors/drum_inline_utils.py b/custom_model_runner/datarobot_drum/drum/root_predictors/drum_inline_utils.py index 74704ed47..66392202f 100644 --- a/custom_model_runner/datarobot_drum/drum/root_predictors/drum_inline_utils.py +++ b/custom_model_runner/datarobot_drum/drum/root_predictors/drum_inline_utils.py @@ -26,7 +26,8 @@ from typing import Generator, List from datarobot_drum.drum.args_parser import CMRunnerArgsRegistry -from datarobot_drum.drum.common import setup_required_environment_variables, setup_tracer +from datarobot_drum.drum.common import setup_tracer +from datarobot_drum.drum.utils.setup import setup_options from datarobot_drum.drum.drum import CMRunner from datarobot_drum.drum.language_predictors.base_language_predictor import BaseLanguagePredictor from datarobot_drum.drum.runtime import DrumRuntime @@ -68,12 +69,15 @@ def drum_inline_predictor( target_type, *cmd_args, ] - options = arg_parser.parse_args(args) - CMRunnerArgsRegistry.verify_options(options) - setup_required_environment_variables(options) - runtime.options = options - setup_tracer(RuntimeParameters, options) + try: + options = setup_options(args) + runtime.options = options + except Exception as exc: + print(str(exc)) + exit(255) + + runtime.trace_provider = setup_tracer(RuntimeParameters, options) runtime.cm_runner = CMRunner(runtime) params = runtime.cm_runner.get_predictor_params() predictor = GenericPredictorComponent(params) diff --git a/custom_model_runner/datarobot_drum/drum/utils/setup.py b/custom_model_runner/datarobot_drum/drum/utils/setup.py new file mode 100644 index 000000000..6bd80ba88 --- /dev/null +++ b/custom_model_runner/datarobot_drum/drum/utils/setup.py @@ -0,0 +1,68 @@ +""" +Copyright 2025 DataRobot, Inc. and its affiliates. +All rights reserved. +This is proprietary source code of DataRobot, Inc. and its affiliates. +Released under the terms of DataRobot Tool and Utility Agreement. +""" + +import sys + +from datarobot_drum import RuntimeParameters +from datarobot_drum.drum.args_parser import CMRunnerArgsRegistry +from datarobot_drum.drum.lazy_loading.lazy_loading_handler import LazyLoadingHandler +from datarobot_drum.runtime_parameters.runtime_parameters import RuntimeParametersLoader + + +def setup_options(args=None): + """ + Setup options for the Drum runtime. + This function is used to set up the command line arguments and options + for the Drum runtime, including environment variables and maximum workers. + + Parameters + ---------- + args : list, optional + List of command line arguments to parse. If None, uses sys.argv[1:]. + Defaults to None, which means it will use sys.argv[1:]. + + Returns + ------- + options : argparse.Namespace + Parsed command line options as an argparse.Namespace object. + """ + arg_parser = CMRunnerArgsRegistry.get_arg_parser() + + try: + import argcomplete + except ImportError: + print( + "WARNING: autocompletion of arguments is not supported " + "as 'argcomplete' package is not found", + file=sys.stderr, + ) + else: + # argcomplete call should be as close to the beginning as possible + argcomplete.autocomplete(arg_parser) + + CMRunnerArgsRegistry.extend_sys_argv_with_env_vars() + + options = arg_parser.parse_args(args) + + """Set max workers from runtime parameters if available.""" + if RuntimeParameters.has("CUSTOM_MODEL_WORKERS"): + options.max_workers = RuntimeParameters.get("CUSTOM_MODEL_WORKERS") + elif "max_workers" not in options or options.max_workers is None: + options.max_workers = 1 # Default to 1 worker if not specified + else: + options.max_workers = int(options.max_workers) + + CMRunnerArgsRegistry.verify_options(options) + + if "runtime_params_file" in options and options.runtime_params_file: + loader = RuntimeParametersLoader(options.runtime_params_file, options.code_dir) + loader.setup_environment_variables() + + if "lazy_loading_file" in options and options.lazy_loading_file: + LazyLoadingHandler.setup_environment_variables_from_values_file(options.lazy_loading_file) + + return options diff --git a/tests/unit/datarobot_drum/drum/test_main.py b/tests/unit/datarobot_drum/drum/test_main.py index b84320681..2d5ecc6a3 100644 --- a/tests/unit/datarobot_drum/drum/test_main.py +++ b/tests/unit/datarobot_drum/drum/test_main.py @@ -13,12 +13,12 @@ @pytest.mark.parametrize("workers_param, expected_workers", [(None, 0), (1, 1), (10, 10)]) -@patch("datarobot_drum.drum.main.RuntimeParameters", autospec=True) -@patch("datarobot_drum.drum.main.RuntimeParametersLoader", autospec=True) @patch("datarobot_drum.drum.drum.CMRunner", autospec=True) -@patch("datarobot_drum.drum.main.CMRunnerArgsRegistry", autospec=True) +@patch("datarobot_drum.drum.utils.setup.RuntimeParameters", autospec=True) +@patch("datarobot_drum.drum.utils.setup.RuntimeParametersLoader", autospec=True) +@patch("datarobot_drum.drum.utils.setup.CMRunnerArgsRegistry", autospec=True) def test_custom_model_workers( - args_registry, cm_runner, runtime_params_loader, runtime_params, workers_param, expected_workers + args_registry, runtime_params_loader, runtime_params, cm_runner, workers_param, expected_workers ): options = argparse.Namespace() options.max_workers = 0 diff --git a/tests/unit/datarobot_drum/drum/utils/test_setup.py b/tests/unit/datarobot_drum/drum/utils/test_setup.py new file mode 100644 index 000000000..cb37cb9de --- /dev/null +++ b/tests/unit/datarobot_drum/drum/utils/test_setup.py @@ -0,0 +1,133 @@ +# +# Copyright 2025 DataRobot, Inc. and its affiliates. +# +# All rights reserved. +# This is proprietary source code of DataRobot, Inc. and its affiliates. +# Released under the terms of DataRobot Tool and Utility Agreement. +# +from argparse import Namespace +from unittest import mock + +# Import the function under test +from datarobot_drum.drum.utils.setup import setup_options + + +def test_setup_options_default(monkeypatch): + # Mock CMRunnerArgsRegistry and dependencies + mock_parser = mock.Mock() + mock_options = Namespace(max_workers=None, runtime_params_file=None, lazy_loading_file=None) + mock_parser.parse_args.return_value = mock_options + + with mock.patch( + "datarobot_drum.drum.args_parser.CMRunnerArgsRegistry.get_arg_parser", + return_value=mock_parser, + ), mock.patch( + "datarobot_drum.drum.args_parser.CMRunnerArgsRegistry.extend_sys_argv_with_env_vars" + ), mock.patch( + "datarobot_drum.drum.args_parser.CMRunnerArgsRegistry.verify_options" + ): + opts = setup_options([]) + assert hasattr(opts, "max_workers") + assert opts.max_workers == 1 # Default to 1 + + +def test_setup_options_with_max_workers(monkeypatch): + mock_parser = mock.Mock() + mock_options = Namespace(max_workers="3", runtime_params_file=None, lazy_loading_file=None) + mock_parser.parse_args.return_value = mock_options + + with mock.patch( + "datarobot_drum.drum.args_parser.CMRunnerArgsRegistry.get_arg_parser", + return_value=mock_parser, + ), mock.patch( + "datarobot_drum.drum.args_parser.CMRunnerArgsRegistry.extend_sys_argv_with_env_vars" + ), mock.patch( + "datarobot_drum.drum.args_parser.CMRunnerArgsRegistry.verify_options" + ): + opts = setup_options([]) + assert opts.max_workers == 3 or opts.max_workers == "3" or int(opts.max_workers) == 3 + + +def test_setup_options_runtime_parameters(monkeypatch): + mock_parser = mock.Mock() + mock_options = Namespace(max_workers=None, runtime_params_file=None, lazy_loading_file=None) + mock_parser.parse_args.return_value = mock_options + + with mock.patch( + "datarobot_drum.drum.args_parser.CMRunnerArgsRegistry.get_arg_parser", + return_value=mock_parser, + ), mock.patch( + "datarobot_drum.drum.args_parser.CMRunnerArgsRegistry.extend_sys_argv_with_env_vars" + ), mock.patch( + "datarobot_drum.drum.args_parser.CMRunnerArgsRegistry.verify_options" + ), mock.patch( + "datarobot_drum.RuntimeParameters.has", return_value=True + ), mock.patch( + "datarobot_drum.RuntimeParameters.get", return_value=5 + ): + opts = setup_options([]) + assert opts.max_workers == 5 + + +def test_setup_options_runtime_params_file(monkeypatch): + mock_parser = mock.Mock() + mock_options = Namespace( + max_workers=None, runtime_params_file="params.yaml", code_dir=".", lazy_loading_file=None + ) + mock_parser.parse_args.return_value = mock_options + + with mock.patch( + "datarobot_drum.drum.args_parser.CMRunnerArgsRegistry.get_arg_parser", + return_value=mock_parser, + ), mock.patch( + "datarobot_drum.drum.args_parser.CMRunnerArgsRegistry.extend_sys_argv_with_env_vars" + ), mock.patch( + "datarobot_drum.drum.args_parser.CMRunnerArgsRegistry.verify_options" + ), mock.patch( + "datarobot_drum.drum.utils.setup.RuntimeParametersLoader" + ) as mock_loader: + instance = mock_loader.return_value + instance.setup_environment_variables = mock.Mock() + opts = setup_options([]) + instance.setup_environment_variables.assert_called_once() + + +def test_setup_options_lazy_loading_file(monkeypatch): + mock_parser = mock.Mock() + mock_options = Namespace( + max_workers=None, runtime_params_file=None, lazy_loading_file="lazy.yaml" + ) + mock_parser.parse_args.return_value = mock_options + + with mock.patch( + "datarobot_drum.drum.args_parser.CMRunnerArgsRegistry.get_arg_parser", + return_value=mock_parser, + ), mock.patch( + "datarobot_drum.drum.args_parser.CMRunnerArgsRegistry.extend_sys_argv_with_env_vars" + ), mock.patch( + "datarobot_drum.drum.args_parser.CMRunnerArgsRegistry.verify_options" + ), mock.patch( + "datarobot_drum.drum.lazy_loading.lazy_loading_handler.LazyLoadingHandler.setup_environment_variables_from_values_file" + ) as mock_lazy: + opts = setup_options([]) + mock_lazy.assert_called_once_with("lazy.yaml") + + +def test_setup_options_argcomplete_missing(monkeypatch, capsys): + mock_parser = mock.Mock() + mock_options = Namespace(max_workers=None, runtime_params_file=None, lazy_loading_file=None) + mock_parser.parse_args.return_value = mock_options + + with mock.patch( + "datarobot_drum.drum.args_parser.CMRunnerArgsRegistry.get_arg_parser", + return_value=mock_parser, + ), mock.patch( + "datarobot_drum.drum.args_parser.CMRunnerArgsRegistry.extend_sys_argv_with_env_vars" + ), mock.patch( + "datarobot_drum.drum.args_parser.CMRunnerArgsRegistry.verify_options" + ), mock.patch.dict( + "sys.modules", {"argcomplete": None} + ): + opts = setup_options([]) + captured = capsys.readouterr() + assert "autocompletion of arguments is not supported" in captured.err From ae5245cd95ab7072afc7ccb7446221fe44443275 Mon Sep 17 00:00:00 2001 From: svc-harness-git2 <130688563+svc-harness-git2@users.noreply.github.com> Date: Wed, 13 Aug 2025 02:51:04 +0100 Subject: [PATCH 09/36] [-] (Auto) Bump env_info versions (#1622) Co-authored-by: GenAI Git Bot --- public_dropin_environments/java_codegen/env_info.json | 6 +++--- public_dropin_environments/python311/env_info.json | 6 +++--- .../python311_genai_agents/env_info.json | 6 +++--- public_dropin_environments/python3_keras/env_info.json | 6 +++--- public_dropin_environments/python3_onnx/env_info.json | 6 +++--- public_dropin_environments/python3_pytorch/env_info.json | 6 +++--- public_dropin_environments/python3_sklearn/env_info.json | 6 +++--- public_dropin_environments/python3_xgboost/env_info.json | 6 +++--- public_dropin_environments/r_lang/env_info.json | 6 +++--- 9 files changed, 27 insertions(+), 27 deletions(-) diff --git a/public_dropin_environments/java_codegen/env_info.json b/public_dropin_environments/java_codegen/env_info.json index add23d929..cbac232db 100644 --- a/public_dropin_environments/java_codegen/env_info.json +++ b/public_dropin_environments/java_codegen/env_info.json @@ -4,7 +4,7 @@ "description": "This template can be used as an environment for DataRobot generated scoring code or models that implement the either the IClassificationPredictor or IRegressionPredictor interface from the datarobot-prediction package and for H2O models exported as POJO or MOJO.", "programmingLanguage": "java", "label": "", - "environmentVersionId": "688c1c850012682da8000374", + "environmentVersionId": "689b9c1b0056193d330033b3", "environmentVersionDescription": "", "isPublic": true, "isDownloadable": true, @@ -14,8 +14,8 @@ "contextUrl": "https://github.com/datarobot/datarobot-user-models/tree/master/public_dropin_environments/java_codegen", "imageRepository": "env-java-codegen", "tags": [ - "v11.2.0-688c1c850012682da8000374", - "688c1c850012682da8000374", + "v11.2.0-689b9c1b0056193d330033b3", + "689b9c1b0056193d330033b3", "v11.2.0-latest" ] } diff --git a/public_dropin_environments/python311/env_info.json b/public_dropin_environments/python311/env_info.json index 5171546f8..ebfa292a9 100644 --- a/public_dropin_environments/python311/env_info.json +++ b/public_dropin_environments/python311/env_info.json @@ -4,7 +4,7 @@ "description": "This template environment can be used to create Python based custom models. User is responsible to provide requirements.txt with the model, to install all the required dependencies.", "programmingLanguage": "python", "label": "", - "environmentVersionId": "688c1c860072b67f71005108", + "environmentVersionId": "689b9c1c005f8052300021cc", "environmentVersionDescription": "", "isPublic": true, "isDownloadable": true, @@ -14,8 +14,8 @@ "contextUrl": "https://github.com/datarobot/datarobot-user-models/tree/master/public_dropin_environments/python311", "imageRepository": "env-python", "tags": [ - "v11.2.0-688c1c860072b67f71005108", - "688c1c860072b67f71005108", + "v11.2.0-689b9c1c005f8052300021cc", + "689b9c1c005f8052300021cc", "v11.2.0-latest" ] } diff --git a/public_dropin_environments/python311_genai_agents/env_info.json b/public_dropin_environments/python311_genai_agents/env_info.json index 90d5d9233..4540c145f 100644 --- a/public_dropin_environments/python311_genai_agents/env_info.json +++ b/public_dropin_environments/python311_genai_agents/env_info.json @@ -4,7 +4,7 @@ "description": "This template environment can be used to create GenAI-powered agents using CrewAI, LangGraph, or Llama-Index. Similar to other drop-in environments, you can either include a .pth artifact or any other code needed to deserialize your model, and optionally a custom.py file. You can also use this environment in codespaces.", "programmingLanguage": "python", "label": "", - "environmentVersionId": "6894fd315a5c1b1204e81aff", + "environmentVersionId": "689b9c1c00284a09cb0013c9", "environmentVersionDescription": "", "isPublic": true, "isDownloadable": true, @@ -15,8 +15,8 @@ "contextUrl": "https://github.com/datarobot/datarobot-user-models/tree/master/public_dropin_environments/python311_genai_agents", "imageRepository": "env-python-genai-agents", "tags": [ - "v11.2.0-6894fd315a5c1b1204e81aff", - "6894fd315a5c1b1204e81aff", + "v11.2.0-689b9c1c00284a09cb0013c9", + "689b9c1c00284a09cb0013c9", "v11.2.0-latest" ] } diff --git a/public_dropin_environments/python3_keras/env_info.json b/public_dropin_environments/python3_keras/env_info.json index f4d93148f..4ffa6c85a 100644 --- a/public_dropin_environments/python3_keras/env_info.json +++ b/public_dropin_environments/python3_keras/env_info.json @@ -4,7 +4,7 @@ "description": "This template environment can be used to create artifact-only keras custom models. This environment contains keras backed by tensorflow and only requires your model artifact as a .h5 file and optionally a custom.py file.", "programmingLanguage": "python", "label": "", - "environmentVersionId": "688c1c86002072530100056c", + "environmentVersionId": "689b9c1c005ed75096002511", "environmentVersionDescription": "", "isPublic": true, "isDownloadable": true, @@ -14,8 +14,8 @@ "contextUrl": "https://github.com/datarobot/datarobot-user-models/tree/master/public_dropin_environments/python3_keras", "imageRepository": "env-python-keras", "tags": [ - "v11.2.0-688c1c86002072530100056c", - "688c1c86002072530100056c", + "v11.2.0-689b9c1c005ed75096002511", + "689b9c1c005ed75096002511", "v11.2.0-latest" ] } diff --git a/public_dropin_environments/python3_onnx/env_info.json b/public_dropin_environments/python3_onnx/env_info.json index d7ed253c2..f55343d40 100644 --- a/public_dropin_environments/python3_onnx/env_info.json +++ b/public_dropin_environments/python3_onnx/env_info.json @@ -4,7 +4,7 @@ "description": "This template environment can be used to create artifact-only ONNX custom models. This environment contains ONNX runtime and only requires your model artifact as an .onnx file and optionally a custom.py file.", "programmingLanguage": "python", "label": "", - "environmentVersionId": "688c1c860026bb4aaf007fc9", + "environmentVersionId": "689b9c1c003a007bac004734", "environmentVersionDescription": "", "isPublic": true, "isDownloadable": true, @@ -14,8 +14,8 @@ "contextUrl": "https://github.com/datarobot/datarobot-user-models/tree/master/public_dropin_environments/python3_onnx", "imageRepository": "env-python-onnx", "tags": [ - "v11.2.0-688c1c860026bb4aaf007fc9", - "688c1c860026bb4aaf007fc9", + "v11.2.0-689b9c1c003a007bac004734", + "689b9c1c003a007bac004734", "v11.2.0-latest" ] } diff --git a/public_dropin_environments/python3_pytorch/env_info.json b/public_dropin_environments/python3_pytorch/env_info.json index 7b15c168b..12f121211 100644 --- a/public_dropin_environments/python3_pytorch/env_info.json +++ b/public_dropin_environments/python3_pytorch/env_info.json @@ -4,7 +4,7 @@ "description": "This template environment can be used to create artifact-only PyTorch custom models. This environment contains PyTorch and requires only your model artifact as a .pth file, any other code needed to deserialize your model, and optionally a custom.py file.", "programmingLanguage": "python", "label": "", - "environmentVersionId": "688c1c860071401a3a006f93", + "environmentVersionId": "689b9c1c00358f486400449a", "environmentVersionDescription": "", "isPublic": true, "isDownloadable": true, @@ -14,8 +14,8 @@ "contextUrl": "https://github.com/datarobot/datarobot-user-models/tree/master/public_dropin_environments/python3_pytorch", "imageRepository": "env-python-pytorch", "tags": [ - "v11.2.0-688c1c860071401a3a006f93", - "688c1c860071401a3a006f93", + "v11.2.0-689b9c1c00358f486400449a", + "689b9c1c00358f486400449a", "v11.2.0-latest" ] } diff --git a/public_dropin_environments/python3_sklearn/env_info.json b/public_dropin_environments/python3_sklearn/env_info.json index 08a06cce7..9d3dfe678 100644 --- a/public_dropin_environments/python3_sklearn/env_info.json +++ b/public_dropin_environments/python3_sklearn/env_info.json @@ -4,7 +4,7 @@ "description": "This template environment can be used to create artifact-only scikit-learn custom models. This environment contains scikit-learn and only requires your model artifact as a .pkl file and optionally a custom.py file.", "programmingLanguage": "python", "label": "", - "environmentVersionId": "688c1c860052032c27001851", + "environmentVersionId": "689b9c1c00391a04300027af", "environmentVersionDescription": "", "isPublic": true, "isDownloadable": true, @@ -14,8 +14,8 @@ "contextUrl": "https://github.com/datarobot/datarobot-user-models/tree/master/public_dropin_environments/python3_sklearn", "imageRepository": "env-python-sklearn", "tags": [ - "v11.2.0-688c1c860052032c27001851", - "688c1c860052032c27001851", + "v11.2.0-689b9c1c00391a04300027af", + "689b9c1c00391a04300027af", "v11.2.0-latest" ] } diff --git a/public_dropin_environments/python3_xgboost/env_info.json b/public_dropin_environments/python3_xgboost/env_info.json index 3431e045a..91b2468b4 100644 --- a/public_dropin_environments/python3_xgboost/env_info.json +++ b/public_dropin_environments/python3_xgboost/env_info.json @@ -4,7 +4,7 @@ "description": "This template environment can be used to create artifact-only xgboost custom models. This environment contains xgboost and only requires your model artifact as a .pkl file and optionally a custom.py file.", "programmingLanguage": "python", "label": "", - "environmentVersionId": "688c1c86005f9d5205004325", + "environmentVersionId": "689b9c1c003e816f3c004d3d", "environmentVersionDescription": "", "isPublic": true, "isDownloadable": true, @@ -14,8 +14,8 @@ "contextUrl": "https://github.com/datarobot/datarobot-user-models/tree/master/public_dropin_environments/python3_xgboost", "imageRepository": "env-python-xgboost", "tags": [ - "v11.2.0-688c1c86005f9d5205004325", - "688c1c86005f9d5205004325", + "v11.2.0-689b9c1c003e816f3c004d3d", + "689b9c1c003e816f3c004d3d", "v11.2.0-latest" ] } diff --git a/public_dropin_environments/r_lang/env_info.json b/public_dropin_environments/r_lang/env_info.json index 5b1d123a0..4da033388 100644 --- a/public_dropin_environments/r_lang/env_info.json +++ b/public_dropin_environments/r_lang/env_info.json @@ -4,7 +4,7 @@ "description": "This template environment can be used to create artifact-only R custom models that use the caret library. Your custom model archive need only contain your model artifacts if you use the environment correctly.", "programmingLanguage": "r", "label": "", - "environmentVersionId": "688c1c860078575bb2000c15", + "environmentVersionId": "689b9c1c00395609d2005e42", "environmentVersionDescription": "", "isPublic": true, "isDownloadable": true, @@ -14,8 +14,8 @@ "contextUrl": "https://github.com/datarobot/datarobot-user-models/tree/master/public_dropin_environments/r_lang", "imageRepository": "env-r-lang", "tags": [ - "v11.2.0-688c1c860078575bb2000c15", - "688c1c860078575bb2000c15", + "v11.2.0-689b9c1c00395609d2005e42", + "689b9c1c00395609d2005e42", "v11.2.0-latest" ] } From e9b3163bdf6f190f55ad9d1f6e53acca543cf24d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 13 Aug 2025 10:58:56 +0300 Subject: [PATCH 10/36] Bump keras in /public_dropin_environments/python3_keras (#1624) Bumps [keras](https://github.com/keras-team/keras) from 3.10.0 to 3.11.0. - [Release notes](https://github.com/keras-team/keras/releases) - [Commits](https://github.com/keras-team/keras/compare/v3.10.0...v3.11.0) --- updated-dependencies: - dependency-name: keras dependency-version: 3.11.0 dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- public_dropin_environments/python3_keras/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public_dropin_environments/python3_keras/requirements.txt b/public_dropin_environments/python3_keras/requirements.txt index 72350ad75..840c467ab 100644 --- a/public_dropin_environments/python3_keras/requirements.txt +++ b/public_dropin_environments/python3_keras/requirements.txt @@ -48,7 +48,7 @@ jinja2==3.1.6 jmespath==1.0.1 joblib==1.5.1 julia==0.5.7 -keras==3.10.0 +keras==3.11.0 libclang==18.1.1 markdown==3.8 markdown-it-py==3.0.0 From cfa570e3ab381de37800b11563170680029d6c2e Mon Sep 17 00:00:00 2001 From: Nickolai Novik <92932793+nickolai-dr@users.noreply.github.com> Date: Wed, 13 Aug 2025 11:56:06 -0400 Subject: [PATCH 11/36] Configure OTel metrics by default in drum. (#1620) --- .../datarobot_drum/drum/common.py | 45 +++++++++++-------- .../datarobot_drum/drum/main.py | 11 ++--- .../drum/root_predictors/drum_inline_utils.py | 8 +++- .../datarobot_drum/drum/runtime.py | 2 + tests/unit/datarobot_drum/drum/test_main.py | 5 ++- 5 files changed, 44 insertions(+), 27 deletions(-) diff --git a/custom_model_runner/datarobot_drum/drum/common.py b/custom_model_runner/datarobot_drum/drum/common.py index de2667b1d..d9a8765b0 100644 --- a/custom_model_runner/datarobot_drum/drum/common.py +++ b/custom_model_runner/datarobot_drum/drum/common.py @@ -22,8 +22,11 @@ PayloadFormat, ) from datarobot_drum.drum.exceptions import DrumCommonException -from opentelemetry import trace, context +from opentelemetry import trace, context, metrics +from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter +from opentelemetry.sdk.metrics import MeterProvider +from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader from opentelemetry.sdk.resources import Resource from opentelemetry.sdk.trace import TracerProvider from opentelemetry.sdk.trace.export import BatchSpanProcessor @@ -136,7 +139,7 @@ def make_otel_endpoint(datarobot_endpoint): return result -def setup_tracer(runtime_parameters, options): +def setup_otel(runtime_parameters, options): """Setups OTEL tracer. OTEL is configured by OTEL_EXPORTER_OTLP_ENDPOINT and @@ -150,26 +153,31 @@ def setup_tracer(runtime_parameters, options): command argumetns Returns ------- - TracerProvider + (TracerProvider, MetricProvider) """ - log = get_drum_logger("setup_tracer") + log = get_drum_logger("setup_otel") # Can be used to disable OTEL reporting from env var parameters # https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/ if runtime_parameters.has("OTEL_SDK_DISABLED") and os.environ.get("OTEL_SDK_DISABLED"): - log.info("Tracing explictly disabled") - return + log.info("OTEL explictly disabled") + return (None, None) - main_endpoint = os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT") - trace_endpoint = os.environ.get("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT") - if not main_endpoint and not trace_endpoint: - log.info("Tracing is not configured") - return + endpoint = os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT") + if not endpoint: + log.info("OTEL is not configured") + return (None, None) resource = Resource.create() - otlp_exporter = OTLPSpanExporter() - provider = TracerProvider(resource=resource) + # OTEL metrics setup. + metric_exporter = OTLPMetricExporter() + metric_reader = PeriodicExportingMetricReader(metric_exporter) + metric_provider = MeterProvider(metric_readers=[metric_reader], resource=resource) + metrics.set_meter_provider(metric_provider) + # OTEL traces setup. + otlp_exporter = OTLPSpanExporter() + trace_provider = TracerProvider(resource=resource) # In case of NIM flask server is configured to run in multiprocessing # mode that uses fork. Since BatchSpanProcessor start background thread # with bunch of locks, OTEL simply deadlocks and does not offlooad any @@ -178,15 +186,14 @@ def setup_tracer(runtime_parameters, options): # case we use SimpleSpanProcessor (mostly NIMs) otherwise BatchSpanProcessor # (most frequent case) if options.max_workers > 1: - provider.add_span_processor(SimpleSpanProcessor(otlp_exporter)) + trace_provider.add_span_processor(SimpleSpanProcessor(otlp_exporter)) else: - provider.add_span_processor(BatchSpanProcessor(otlp_exporter)) + trace_provider.add_span_processor(BatchSpanProcessor(otlp_exporter)) - trace.set_tracer_provider(provider) + trace.set_tracer_provider(trace_provider) - endpoint = main_endpoint or trace_endpoint - log.info(f"Tracing is configured with endpoint: {endpoint}") - return provider + log.info(f"OTEL is configured with endpoint: {endpoint}") + return trace_provider, metric_provider @contextmanager diff --git a/custom_model_runner/datarobot_drum/drum/main.py b/custom_model_runner/datarobot_drum/drum/main.py index bad50276c..3f6ad281c 100644 --- a/custom_model_runner/datarobot_drum/drum/main.py +++ b/custom_model_runner/datarobot_drum/drum/main.py @@ -43,10 +43,7 @@ import signal import sys -from datarobot_drum.drum.common import ( - config_logging, - setup_tracer, -) +from datarobot_drum.drum.common import config_logging, setup_otel from datarobot_drum.drum.utils.setup import setup_options from datarobot_drum.drum.enum import RunMode from datarobot_drum.drum.enum import ExitCodes @@ -73,6 +70,8 @@ def signal_handler(sig, frame): # Let traceer offload accumulated spans before shutdown. if runtime.trace_provider is not None: runtime.trace_provider.shutdown() + if runtime.metric_provider is not None: + runtime.metric_provider.shutdown() os._exit(130) @@ -83,7 +82,9 @@ def signal_handler(sig, frame): print(str(exc)) exit(255) - runtime.trace_provider = setup_tracer(RuntimeParameters, options) + trace_provider, metric_provider = setup_otel(RuntimeParameters, options) + runtime.trace_provider = trace_provider + runtime.metric_provider = metric_provider signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGTERM, signal_handler) diff --git a/custom_model_runner/datarobot_drum/drum/root_predictors/drum_inline_utils.py b/custom_model_runner/datarobot_drum/drum/root_predictors/drum_inline_utils.py index 66392202f..2c9e9de6c 100644 --- a/custom_model_runner/datarobot_drum/drum/root_predictors/drum_inline_utils.py +++ b/custom_model_runner/datarobot_drum/drum/root_predictors/drum_inline_utils.py @@ -26,7 +26,7 @@ from typing import Generator, List from datarobot_drum.drum.args_parser import CMRunnerArgsRegistry -from datarobot_drum.drum.common import setup_tracer +from datarobot_drum.drum.common import setup_otel from datarobot_drum.drum.utils.setup import setup_options from datarobot_drum.drum.drum import CMRunner from datarobot_drum.drum.language_predictors.base_language_predictor import BaseLanguagePredictor @@ -77,9 +77,13 @@ def drum_inline_predictor( print(str(exc)) exit(255) - runtime.trace_provider = setup_tracer(RuntimeParameters, options) + trace_provider, metric_provider = setup_otel(RuntimeParameters, options) runtime.cm_runner = CMRunner(runtime) params = runtime.cm_runner.get_predictor_params() predictor = GenericPredictorComponent(params) yield predictor.predictor + if trace_provider is not None: + trace_provider.shutdown() + if metric_provider is not None: + metric_provider.shutdown() diff --git a/custom_model_runner/datarobot_drum/drum/runtime.py b/custom_model_runner/datarobot_drum/drum/runtime.py index d0814cd3e..4e713a337 100644 --- a/custom_model_runner/datarobot_drum/drum/runtime.py +++ b/custom_model_runner/datarobot_drum/drum/runtime.py @@ -30,7 +30,9 @@ def __init__(self, app): self.initialization_succeeded = False self.options = None self.cm_runner = None + # OTEL services self.trace_provider = None + self.metric_provider = None self.app = app def __enter__(self): diff --git a/tests/unit/datarobot_drum/drum/test_main.py b/tests/unit/datarobot_drum/drum/test_main.py index 2d5ecc6a3..5c321a20f 100644 --- a/tests/unit/datarobot_drum/drum/test_main.py +++ b/tests/unit/datarobot_drum/drum/test_main.py @@ -34,7 +34,10 @@ def test_custom_model_workers( else: runtime_params.has.return_value = False - main() + with patch("datarobot_drum.drum.main.setup_otel") as setup_otel_mock: + setup_otel_mock.return_value = (None, None) + main() + runtime_params.has.assert_any_call("CUSTOM_MODEL_WORKERS") if workers_param: runtime_params.get.assert_any_call("CUSTOM_MODEL_WORKERS") From eadfebe891ee5728d6475de286da2929f4e89be6 Mon Sep 17 00:00:00 2001 From: Aaron Ball Date: Wed, 13 Aug 2025 10:39:27 -0600 Subject: [PATCH 12/36] [RAPTOR-14453] Regen requirements.txt to fix CVE-2025-8747 (#1623) * [RAPTOR-14453] Regen requirements.txt to fix CVE-2025-8747 This regenerates the requirements.txt file from requirements.in, to pull in the latest keras 3.11.2, upgrading from 3.10.0. Upgrading past 3.11.0 fixes CVE-2025-8747. * Reconcile dependencies, updated IDs, tags --------- Co-authored-by: svc-harness-git2 --- .../python3_keras/env_info.json | 6 +- .../python3_keras/requirements.txt | 85 +++++++++---------- 2 files changed, 45 insertions(+), 46 deletions(-) diff --git a/public_dropin_environments/python3_keras/env_info.json b/public_dropin_environments/python3_keras/env_info.json index 4ffa6c85a..f08ae463b 100644 --- a/public_dropin_environments/python3_keras/env_info.json +++ b/public_dropin_environments/python3_keras/env_info.json @@ -4,7 +4,7 @@ "description": "This template environment can be used to create artifact-only keras custom models. This environment contains keras backed by tensorflow and only requires your model artifact as a .h5 file and optionally a custom.py file.", "programmingLanguage": "python", "label": "", - "environmentVersionId": "689b9c1c005ed75096002511", + "environmentVersionId": "689cb69b9c627511efb5b741", "environmentVersionDescription": "", "isPublic": true, "isDownloadable": true, @@ -14,8 +14,8 @@ "contextUrl": "https://github.com/datarobot/datarobot-user-models/tree/master/public_dropin_environments/python3_keras", "imageRepository": "env-python-keras", "tags": [ - "v11.2.0-689b9c1c005ed75096002511", - "689b9c1c005ed75096002511", + "v11.2.0-689cb69b9c627511efb5b741", + "689cb69b9c627511efb5b741", "v11.2.0-latest" ] } diff --git a/public_dropin_environments/python3_keras/requirements.txt b/public_dropin_environments/python3_keras/requirements.txt index 840c467ab..f8a04dc25 100644 --- a/public_dropin_environments/python3_keras/requirements.txt +++ b/public_dropin_environments/python3_keras/requirements.txt @@ -4,80 +4,79 @@ # # pip-compile --index-url=https://pypi.org/simple --no-annotate --no-emit-index-url --no-emit-trusted-host --output-file=requirements.txt requirements.in # -absl-py==2.3.0 +absl-py==2.3.1 annotated-types==0.7.0 argcomplete==3.6.2 astunparse==1.6.3 -azure-core==1.34.0 -azure-identity==1.23.0 +azure-core==1.35.0 +azure-identity==1.24.0 azure-storage-blob==12.19.0 blinker==1.9.0 -boto3==1.38.23 -botocore==1.38.23 +boto3==1.40.8 +botocore==1.40.8 cachetools==5.5.2 -certifi==2025.4.26 +certifi==2025.8.3 cffi==1.17.1 -charset-normalizer==3.4.2 +charset-normalizer==3.4.3 click==8.2.1 -cryptography==45.0.3 -datarobot==3.7.1 -datarobot-drum==1.16.19 +cryptography==45.0.6 +datarobot==3.8.2 +datarobot-drum==1.16.20 datarobot-mlops==11.1.0 datarobot-storage==2.2.0 -deprecated==1.2.18 docker==7.1.0 filechunkio==1.8 flask==3.1.1 flatbuffers==25.2.10 gast==0.6.0 -google-api-core==2.25.0rc1 -google-auth==2.40.2 +google-api-core==2.25.1 +google-auth==2.40.3 google-cloud-core==2.4.3 google-cloud-storage==2.19.0 google-crc32c==1.7.1 google-pasta==0.2.0 google-resumable-media==2.7.2 googleapis-common-protos==1.70.0 -grpcio==1.71.0 -h5py==3.13.0 +grpcio==1.74.0 +h5py==3.14.0 idna==3.10 -importlib-metadata==8.6.1 +importlib-metadata==8.7.0 isodate==0.7.2 itsdangerous==2.2.0 jinja2==3.1.6 jmespath==1.0.1 joblib==1.5.1 julia==0.5.7 -keras==3.11.0 +keras==3.11.2 libclang==18.1.1 -markdown==3.8 -markdown-it-py==3.0.0 +markdown==3.8.2 +markdown-it-py==4.0.0 markupsafe==3.0.2 mdurl==0.1.2 memory-profiler==0.61.0 -ml-dtypes==0.5.1 -msal==1.32.3 +ml-dtypes==0.5.3 +msal==1.33.0 msal-extensions==1.3.1 mypy-extensions==1.1.0 namex==0.1.0 numpy==2.1.3 -opentelemetry-api==1.33.1 -opentelemetry-exporter-otlp-proto-common==1.33.1 -opentelemetry-exporter-otlp-proto-http==1.33.1 -opentelemetry-instrumentation==0.54b1 -opentelemetry-instrumentation-aiohttp-client==0.54b1 -opentelemetry-instrumentation-requests==0.54b1 -opentelemetry-proto==1.33.1 -opentelemetry-sdk==1.33.1 -opentelemetry-semantic-conventions==0.54b1 -opentelemetry-util-http==0.54b1 +opentelemetry-api==1.36.0 +opentelemetry-exporter-otlp-proto-common==1.36.0 +opentelemetry-exporter-otlp-proto-http==1.36.0 +opentelemetry-instrumentation==0.57b0 +opentelemetry-instrumentation-aiohttp-client==0.57b0 +opentelemetry-instrumentation-requests==0.57b0 +opentelemetry-proto==1.36.0 +opentelemetry-sdk==1.36.0 +opentelemetry-semantic-conventions==0.57b0 +opentelemetry-util-http==0.57b0 opt-einsum==3.4.0 -optree==0.15.0 -orjson==3.10.18 +optree==0.17.0 +orjson==3.11.2 packaging==25.0 -pandas==2.2.3 +pandas==2.3.1 pillow==11.3.0 -progress==1.6 +progress==1.6.1 proto-plus==1.26.1 protobuf==5.29.5 psutil==7.0.0 @@ -85,21 +84,21 @@ py4j==0.10.9.9 pyasn1==0.6.1 pyasn1-modules==0.4.2 pycparser==2.22 -pydantic==2.11.5 +pydantic==2.11.7 pydantic-core==2.33.2 -pygments==2.19.1 +pygments==2.19.2 pyjwt[crypto]==2.10.1 python-dateutil==2.9.0.post0 pytz==2025.2 pyyaml==6.0.2 requests==2.32.4 requests-toolbelt==1.0.0 -rich==14.0.0 +rich==14.1.0 rsa==4.9.1 ruamel-yaml==0.17.4 -s3transfer==0.13.0 +s3transfer==0.13.1 scikeras==0.13.0 -scikit-learn==1.6.1 +scikit-learn==1.7.1 scipy==1.15.3 six==1.17.0 strenum==0.4.15 @@ -112,14 +111,14 @@ termcolor==3.1.0 texttable==1.7.0 threadpoolctl==3.6.0 trafaret==2.1.1 -typing-extensions==4.13.2 +typing-extensions==4.14.1 typing-inspection==0.4.1 tzdata==2025.2 urllib3==2.5.0 werkzeug==3.1.3 wheel==0.45.1 -wrapt==1.17.2 -zipp==3.22.0 +wrapt==1.17.3 +zipp==3.23.0 # The following packages are considered to be unsafe in a requirements file: # setuptools From db6ce4cf25e18180d069b547020494e132296142 Mon Sep 17 00:00:00 2001 From: Matt Nitzken Date: Wed, 13 Aug 2025 14:06:13 -0400 Subject: [PATCH 13/36] [BUZZOK-27241] Update DRUM version to 1.16.22 to add support for `def chat` kwargs (#1621) * Update DRUM version * Update CHANGELOG.md --- custom_model_runner/CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/custom_model_runner/CHANGELOG.md b/custom_model_runner/CHANGELOG.md index 4132fbd98..e3e1772b1 100644 --- a/custom_model_runner/CHANGELOG.md +++ b/custom_model_runner/CHANGELOG.md @@ -4,6 +4,11 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +#### [1.16.22] - 2025-08-12 +##### Changed +- Add support for kwargs and headers to generative ai chat models +- Fix support for drum inline execution + #### [1.16.21] - 2025-07-16 ##### Removed - Removed PMML support From f8adbb2a0d39692ad804517342d19abf3b19d51c Mon Sep 17 00:00:00 2001 From: Matt Nitzken Date: Thu, 14 Aug 2025 14:17:54 -0400 Subject: [PATCH 14/36] [BUZZOK-27241] [BUZZOK-27421] Bump requirements in GenAI Agents environment (#1627) * Bump requirements in GenAI Agents environment * Update * Reconcile dependencies, updated IDs, tags --------- Co-authored-by: svc-harness-git2 --- .../python311_genai_agents/env_info.json | 6 +-- .../python311_genai_agents/requirements.in | 36 +++++++------ .../python311_genai_agents/requirements.txt | 51 +++++++------------ 3 files changed, 42 insertions(+), 51 deletions(-) diff --git a/public_dropin_environments/python311_genai_agents/env_info.json b/public_dropin_environments/python311_genai_agents/env_info.json index 4540c145f..7acc25582 100644 --- a/public_dropin_environments/python311_genai_agents/env_info.json +++ b/public_dropin_environments/python311_genai_agents/env_info.json @@ -4,7 +4,7 @@ "description": "This template environment can be used to create GenAI-powered agents using CrewAI, LangGraph, or Llama-Index. Similar to other drop-in environments, you can either include a .pth artifact or any other code needed to deserialize your model, and optionally a custom.py file. You can also use this environment in codespaces.", "programmingLanguage": "python", "label": "", - "environmentVersionId": "689b9c1c00284a09cb0013c9", + "environmentVersionId": "689e166376dbcf1206bb5ce4", "environmentVersionDescription": "", "isPublic": true, "isDownloadable": true, @@ -15,8 +15,8 @@ "contextUrl": "https://github.com/datarobot/datarobot-user-models/tree/master/public_dropin_environments/python311_genai_agents", "imageRepository": "env-python-genai-agents", "tags": [ - "v11.2.0-689b9c1c00284a09cb0013c9", - "689b9c1c00284a09cb0013c9", + "v11.2.0-689e166376dbcf1206bb5ce4", + "689e166376dbcf1206bb5ce4", "v11.2.0-latest" ] } diff --git a/public_dropin_environments/python311_genai_agents/requirements.in b/public_dropin_environments/python311_genai_agents/requirements.in index 56951a1df..6b840b484 100644 --- a/public_dropin_environments/python311_genai_agents/requirements.in +++ b/public_dropin_environments/python311_genai_agents/requirements.in @@ -23,36 +23,40 @@ urllib3>=2.5.0 click~=8.1.8 crewai>=0.140.0 crewai-tools>=0.48.0 -datarobot-drum>=1.16.19 -datarobot-moderations>=11.1.23 +datarobot-drum>=1.16.22 +datarobot-moderations>=11.2.2 datarobot-mlops>=11.1.0 -datarobot>=3.7.0 +datarobot[auth]>=3.8.2 dotenv~=0.9.9 langchain-community~=0.3.23 +langchain-mcp-adapters~=0.1.9 langchain~=0.3.23 langgraph~=0.4.10 langgraph-prebuilt~=0.2.3 legacy-cgi~=2.6.3 litellm>=1.72.1 llama-index-core>=0.12.49 -llama-index-llms-langchain~=0.6.1 -llama-index-llms-litellm~=0.4.1 -llama-index-llms-openai~=0.3.38 +llama-index-llms-langchain>=0.6.1 +llama-index-llms-litellm>=0.4.1 +llama-index-llms-openai>=0.3.38 llama-index~=0.12.33 +mcp>=1.11.0 multidict~=6.5.0 onnxruntime~=1.22.0 -openai~=1.76.2 -opentelemetry-api~=1.33.0 -opentelemetry-instrumentation-aiohttp-client~=0.54b0 -opentelemetry-instrumentation-crewai~=0.40.5 -opentelemetry-instrumentation-httpx~=0.54b0 -opentelemetry-instrumentation-langchain~=0.40.5 -opentelemetry-instrumentation-llamaindex~=0.40.5 -opentelemetry-instrumentation-openai~=0.40.5 -opentelemetry-instrumentation-requests~=0.54b0 -opentelemetry-sdk~=1.33.0 +openai>=1.76.2 +opentelemetry-api>=1.33.0,<2.0.0 +opentelemetry-instrumentation-aiohttp-client>=0.54b0 +opentelemetry-instrumentation-crewai>=0.40.5 +opentelemetry-instrumentation-httpx>=0.54b0 +opentelemetry-instrumentation-langchain>=0.40.5 +opentelemetry-instrumentation-llamaindex>=0.40.5 +opentelemetry-instrumentation-openai>=0.40.5 +opentelemetry-instrumentation-requests>=0.54b0 +opentelemetry-sdk>=1.33.0 python-dotenv~=1.1.0 ragas @ git+https://github.com/explodinggradients/ragas@5d59549ad5ef511f621502c563bc55ac5aeb9188#subdirectory=ragas +# pyarrow==21.0.0 breaks the current ragas version https://github.com/apache/arrow/issues/47155 +pyarrow<21.0.0 requests~=2.32.4 traceloop-sdk~=0.40.2 uvicorn~=0.35.0 diff --git a/public_dropin_environments/python311_genai_agents/requirements.txt b/public_dropin_environments/python311_genai_agents/requirements.txt index 4468725aa..2db3a56ba 100644 --- a/public_dropin_environments/python311_genai_agents/requirements.txt +++ b/public_dropin_environments/python311_genai_agents/requirements.txt @@ -31,11 +31,9 @@ azure-core==1.34.0 azure-identity==1.22.0 azure-storage-blob==12.19.0 backoff==2.2.1 -backports-tarfile==1.2.0 banks==2.1.2 bcrypt==4.3.0 beautifulsoup4==4.13.4 -black==25.1.0 bleach[css]==6.2.0 blinker==1.9.0 boto3==1.37.3 @@ -52,21 +50,20 @@ cohere==5.15.0 colorama==0.4.6 coloredlogs==15.0.1 comm==0.2.2 -coverage==7.8.2 crewai==0.140.0 crewai-tools==0.48.0 cryptography==44.0.3 dataclasses-json==0.6.7 -datarobot==3.7.1 -datarobot-drum==1.16.19 +datarobot[auth]==3.8.2 +datarobot-drum==1.16.22 datarobot-mlops==11.1.0 -datarobot-moderations==11.2.0 +datarobot-moderations==11.2.2 datarobot-predict==1.13.2 datarobot-storage==2.2.0 datasets==3.6.0 debugpy==1.8.14 decorator==5.2.1 -deepeval==2.7.9 +deepeval==3.3.9 defusedxml==0.7.1 deprecated==1.2.18 deprecation==2.1.0 @@ -76,7 +73,6 @@ diskcache==5.6.3 distro==1.9.0 docker==7.1.0 docstring-parser==0.16 -docutils==0.21.2 dotenv==0.9.9 durationpy==0.9 ecs-logging==2.2.0 @@ -136,11 +132,7 @@ ipython==8.37.0 isodate==0.7.2 isoduration==20.11.0 itsdangerous==2.2.0 -jaraco-classes==3.4.0 -jaraco-context==6.0.1 -jaraco-functools==4.1.0 jedi==0.19.2 -jeepney==0.9.0 jinja2==3.1.6 jiter==0.8.2 jmespath==1.0.1 @@ -161,7 +153,6 @@ jupyter-kernel-gateway==3.0.1 jupyter-server==2.15.0 jupyter-server-terminals==0.5.3 jupyterlab-pygments==0.3.0 -keyring==25.6.0 kubernetes==32.0.1 lancedb==0.22.0 langchain==0.3.25 @@ -169,6 +160,7 @@ langchain-cohere==0.3.5 langchain-community==0.3.23 langchain-core==0.3.59 langchain-experimental==0.3.4 +langchain-mcp-adapters==0.1.9 langchain-nvidia-ai-endpoints==0.3.10 langchain-openai==0.2.14 langchain-text-splitters==0.3.8 @@ -182,24 +174,23 @@ legacy-cgi==2.6.3 litellm==1.72.6 llama-cloud==0.1.21 llama-cloud-services==0.6.15 -llama-index==0.12.35 -llama-index-agent-openai==0.4.7 -llama-index-cli==0.4.1 +llama-index==0.12.51 +llama-index-agent-openai==0.4.12 +llama-index-cli==0.4.4 llama-index-core==0.12.51 llama-index-embeddings-azure-openai==0.3.7 llama-index-embeddings-openai==0.3.1 llama-index-indices-managed-llama-cloud==0.6.11 llama-index-instrumentation==0.3.0 -llama-index-llms-azure-openai==0.3.2 +llama-index-llms-azure-openai==0.3.4 llama-index-llms-bedrock-converse==0.7.1 -llama-index-llms-fireworks==0.3.2 llama-index-llms-langchain==0.6.1 llama-index-llms-litellm==0.4.2 -llama-index-llms-openai==0.3.38 +llama-index-llms-openai==0.4.7 llama-index-llms-vertex==0.5.0 -llama-index-multi-modal-llms-openai==0.4.3 -llama-index-program-openai==0.3.1 -llama-index-question-gen-openai==0.3.0 +llama-index-multi-modal-llms-openai==0.5.3 +llama-index-program-openai==0.3.2 +llama-index-question-gen-openai==0.3.1 llama-index-readers-file==0.4.7 llama-index-readers-llama-parse==0.4.0 llama-index-workflows==1.1.0 @@ -210,13 +201,13 @@ markdown-it-py==3.0.0 markupsafe==3.0.2 marshmallow==3.26.1 matplotlib-inline==0.1.7 +mcp==1.11.0 mdurl==0.1.2 mem0ai==0.1.98 memory-profiler==0.61.0 mistune==3.1.3 mmh3==5.1.0 monotonic==1.6 -more-itertools==10.7.0 mpmath==1.3.0 msal==1.32.3 msal-extensions==1.3.1 @@ -229,14 +220,13 @@ nbformat==5.10.4 nemoguardrails==0.14.0 nest-asyncio==1.6.0 networkx==3.4.2 -nh3==0.2.21 nltk==3.9.1 nodeenv==1.9.1 numpy==2.2.5 oauthlib==3.2.2 ollama==0.5.1 onnxruntime==1.22.0 -openai==1.76.2 +openai==1.99.9 openpyxl==3.1.5 opentelemetry-api==1.33.0 opentelemetry-exporter-otlp-proto-common==1.33.0 @@ -292,12 +282,10 @@ packaging==24.2 pandas==2.2.3 pandocfilters==1.5.1 parso==0.8.4 -pathspec==0.12.1 pdfminer-six==20250327 pdfplumber==0.11.6 pexpect==4.9.0 pillow==11.3.0 -pkginfo==1.10.0 platformdirs==4.3.8 pluggy==1.6.0 portalocker==2.10.1 @@ -320,6 +308,7 @@ pycparser==2.22 pydantic==2.11.7 pydantic-core==2.33.2 pydantic-settings==2.9.1 +pyfiglet==1.0.3 pygments==2.19.1 pyjwt[crypto]==2.10.1 pypdf==5.4.0 @@ -336,6 +325,7 @@ pytest-xdist==3.7.0 python-dateutil==2.9.0.post0 python-dotenv==1.1.0 python-json-logger==3.3.0 +python-multipart==0.0.20 pytube==15.0.0 pytz==2024.2 pyvis==0.3.2 @@ -343,14 +333,12 @@ pyyaml==6.0.2 pyzmq==26.4.0 qdrant-client==1.14.2 ragas @ git+https://github.com/explodinggradients/ragas@5d59549ad5ef511f621502c563bc55ac5aeb9188#subdirectory=ragas -readme-renderer==44.0 referencing==0.36.2 regex==2024.11.6 requests==2.32.4 requests-oauthlib==2.0.0 requests-toolbelt==1.0.0 rfc3339-validator==0.1.4 -rfc3986==2.0.0 rfc3986-validator==0.1.1 rich==13.9.4 rouge-score==0.1.2 @@ -360,7 +348,6 @@ ruamel-yaml==0.17.4 s3transfer==0.11.3 schema==0.7.7 scipy==1.15.3 -secretstorage==3.3.3 send2trash==1.8.3 sentry-sdk==2.29.1 shapely==2.1.1 @@ -370,6 +357,7 @@ six==1.17.0 sniffio==1.3.1 soupsieve==2.7 sqlalchemy[asyncio]==2.0.40 +sse-starlette==3.0.2 stack-data==0.6.3 starlette==0.46.2 strenum==0.4.15 @@ -391,11 +379,10 @@ tqdm==4.67.1 traceloop-sdk==0.40.14 trafaret==2.1.1 traitlets==5.14.3 -twine==5.1.1 typer==0.15.3 types-python-dateutil==2.9.0.20241206 types-requests==2.32.0.20250328 -typing-extensions==4.13.2 +typing-extensions==4.14.1 typing-inspect==0.9.0 typing-inspection==0.4.0 tzdata==2025.2 From 32420b37ec657c6fd10bccc97d00743cb4a8a23e Mon Sep 17 00:00:00 2001 From: Chris Russell-Walker Date: Sat, 16 Aug 2025 05:00:44 -0400 Subject: [PATCH 15/36] [CFX-3334] Update to latest drgithelper and properly set permissions for credential cache daemon (#1630) * [CFX-3334] Update to latest drgithelper and properly set permissions for credential cache daemon * Reconcile dependencies, updated IDs, tags --------- Co-authored-by: svc-harness-git2 --- .../py311_notebook_dropin.tar.gz | Bin 13682 -> 13470 bytes .../python311_notebook_base/Dockerfile | 2 +- .../python311_notebook_base/env_info.json | 6 +++--- .../python311_notebook_base/start_server.sh | 4 +++- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/public_dropin_notebook_environments/py311_notebook_dropin.tar.gz b/public_dropin_notebook_environments/py311_notebook_dropin.tar.gz index 80d65fcddcc478c76a408cd1f91e3562eaf2ef27..5481eed2f88544ac23ca3f52558b2cfdac20e7a0 100644 GIT binary patch literal 13470 zcmV;PG-1mhiwFRXaGz)Z1MPinciT47=zi9(Kxsc4DHW+#+lji`vx==Ge&X0ZmfhVR z*H@(|$YLXrT9UG(wzo@IBe_FPxbNd****r`2YS_HMJ$*sbru zJ0!MtcAw_|V?4R}-+Y??o9DmhyrVhtMobZ$y8Z&DtW|gDe->j~P*=>oqO1!*)WW^R zg?1SvP^?juYwmuOGZC3Vqh2q{H0Bd`IZM=(4S-i*GsHsPvY^QSib?%R3eQa&H#NcUR7ySv7sch;l7QnmUJMatW z_!1c#OZ+)ty*%Q;z_?<9K9Y(%UR6q!Qcd0^O9V-lH z96ws#%#AVG;$A$IfSf@xp82EKnumU>QepYW!fb0m^R3FHQjRH*;7pXe)NQz`>P(+h zC7-?)%l@K|Q;T`BT-Y}}xN+j?SE=J3{=c&W9~zvy*e56hwo1g`rYkc8y$P%&SL(lR5~0C_u=E-Wz4 zU20sK@gE0ohWow!xBcPUllT2@7M1@~`&4s0*PC*ygzOr5Kae)8!tCv9hLY$6);}0Y zAnDnR*1Ht)+t>GlG_qd2676_k{)66mZ_qyfgoV^E3ZZ0^7 zC3SxOZbJ-H4 zB!>kCppeXMH8~d?4n(^*n zzfphLfK(}_S=)%!A!2~In(82qn3$}Pw*Rj8rau5~As7O)J!biHA{oT$cOa00P*}!y z=Nc#x{whS5E0^`Fid6w{W3GUU)*VjN7*E|8K!CUzE@RjSdLZ;5ie1erZ+#e57dkPS z@Caw6Y{(7HQ%p}u0R$nWgJkLwCssAZA!K?6A~?`2PdH(Ve}p-Xs3Sx|S@$ONVS5@y zVH69wIgXcrbDM!g1=-#CV#dYrEkKNw>;NGI{FbRMLzT!7nLP&VD!K*UrCHYl=YlRP zBywskQjZV45Q8eQ5U=LYhz~J#A?6%whniMIx1;D6P6t=&t-%lPj}DLD4cK4UU&<3d z$fCtgRtwH%%qAMDR|t;Ouc`J7MHQN*GisoZ6BrmyrjpCQU462?1B^-OQY=l@wl5b; zxuiWemlE+jXiWAM9%R#iGyUp+syNnn8V-&9H_Yau)+?5)=b@Z1Q-Lv#-x-iL;0b{l zxkfN7AN22uU&_3I@In((O-UHtJYA)uY)&6BYjBuO-`P$xTO}^0+r#MfgTGkN(yiA^ z5^yDo|KMT-`2San{rZ9ZA8qqVhZ7CQ(Q^L9w}JQR|Myz7|7-8=wHwXd2JHWu^~RI_ z?@=DP|AXD%?>h)Zz z;(QJgmQ#|`TI%81K8!lB_4g?BBkS~|Fu;sLHaj@Gc#k67*!L2#x5c?(iLD{By#8_T z)lq-Ae=#^ec~4yAXn1yUe17=8Kg2nIaPj^Wcb%+}5tKwD`bKp(Zsa5Lh&zHkqp*wI z8FR`u_dk+W4I1r*^Eq^{!*G(yMzQVZmdp+LGAY*f->m!jj8lvFKbL+)c#56mWAf!` z?7sP5-)W`if2}&6|21|SPxJpV9^7w7IDKrSoo=_`GJ50?d7A%^@jRXXf5!QLX>0v~ zRr{Oclk*;X)j#fkgB#D{r-V6sQa6FKEv~YKi^u$Uv_&1u=<@>Z@i<1LFi}>iF%Vy{ zyib+^Nhc^7Kx>bshAnnj<`HFKD~u{7KCZG=xMZFiK*zn=axBmhui}KyRsFfLVpEKI znh{r8BGgVzqi}gWWpm$)!f`M`WUkfgj#2R?B@Ep`RPA6-@{$Lk#ZhHXN#-Nfp=bl- z3~#WzD4Ow!A8-R@*?Q;h3h8n=Lp8Tje35LY@uU$b4n<|z-~W_Ky|XtA^~DE=XWbf4 zyqZ93O(-}i8Fr<|=f&wY=thuGF1>H;3I#MCj|kPXsN~|f_rBj%I@MTwJ3Qz%>hM4M z^+x{!bo?MkJYI0mpRDduJp%LNP+}zY&xCe0Xzq>c&rn!Jl#)B z22&hQl$6LCcOc78KmCN?;1B#5)1re~VrJW@t?+StuYm(cE64Sa^c@&wsNVq?wdKfv ziT~JhKk(=7tn<>@p&9^GrYZki@<^$CTF*tTgb{gU^F5x%oE&#sPNQLd@BgE>f1XLj z45im+C+|PIUA+M|4e=Orzdt-j>KO2ZCgb33|LBN?3u1qEh&o|Dfe~l&6d(3CN^^}#G_Mk5zGtqO zMZCkNNwSDLwc0i0TaE}rQ+FKBfXSvWxBdsec5u`Np`auxy|nZhq*Y~>9pb;Le){5B zSJ>7bvJ+J4DLzunnn-;{9BsyLV1tKL-6>%Q#Fpw(=-9TiNV_r+(_!1`4~zXso3lUv zIi{UUUYzQVUiYl_Vi-u-H-ZA!ILkX<<+VgB6)*Yu#uim%c>Yp=z)Rg6$ z&h87R&EN~HoAO)BX**4J9l49?jsFW|X}6(NaVzkE0BeJlCm7A#*seP**cxoe9ee%) z78M-vTO0PSvlvD!j&H~88c&qbU?iT}a+?b4KD+(};4cIG7yrAx2Aag`eg>Wc-LbLA z&*KmFl)GbCIAeC>&()M;fg6OB+Z+QA7LNdr0!k%3a}W)@IDU2U`gQ-T56n@Hn@Bh~ zIX?gU+2MKr)eq-=JoVM-CM(wOpH$()Q#|1n2eHW2RjK?5bXRHWWov};GNT^sY?GFf z0GKMy+7x(5P4oJiBh7VdsnK93pGO^`mx$&iGX#F7RlS+M0NP-9TPfLI(0LH)JCiV+ z#U;suSh^bFWIUAlaZIOthB68C0|k~Ps%lVN0l5w}o|MI1q8=ns0v50jy%g8>LA-)F z4crhL>@6c`#St>0afGZy054!J>E<AV772|Jd<<^`w9~BJpovyv*UW`o_onl3 z%)b3-R#htPpPc@{9I+ⅇuG`^})La(PuHdbrV?gq8O95e^t!+Q%#k=&#;nZb*~Pr zm&SUppsbl^Gm;$?8K`WA4QVDl{Vo|O3Q&W=+k=esrJ%sP94iAXA)>_7ni!M?vt9j7 zzlcMAM5H`&sNJ|xjXu`g#fRE+Dm?!%_9IaS$smmUU-%gCvZ6*wWgMy6L5@I~02d^f z+`!`T_=R8)&QYGUuaP+)AbD)lx{0-md50h<%M3}~UP1=Um=>@BJ8G$srLhI4P{|?z z!o!C6zYv9{I3fF}1!Zwm*Wo7#tK)n!f+IndYi1-+kV+|;L|7%T2(;Q4%cz)mEcPOQ zk>nbI2!|FpHI%Euz{dXCWS|#|?-^UJ3^m;~_%yy#m?+FXlUV6U)TSV^Oe4o~a)gl^ zWEvfihJd(Gc3%iyorJi-%3)JOv~kkfvLlkaZ)oF#L2R)jGG?5+A8{9j!WX6|vSE>Jx)007;y@02rP-lE^Hl=Wik|s)=ay3Lz z9HnnyEHHV{6d?>i^c>-iQZ1OUTQnt5b59qN00>inv(eI@jmeTz?>uLr=c3I4NDtux zNvE_XqNY-*qTF#bT-)akXT^h%ut`yFvMh$flD`^m_U)xR| z^(agbZxs4|3T}!iOYjaK*ite@v^I@w~Zb6bmZrj+Ot|U+RNr1YlxE5@6 zhf)=#-mN3|6OsWFMuLdU+c5edw!?&bLZx1-3l7v-4H#<;)re~|4tE4(m55mw?mlQQ zp$o9y!NFO7Fz8AzgU*Zki~1Kkj45Q$UkbQWdHLh4|+FQZghQv{!GF#O6sblo0L6QZ+h|$XYMjU)rZbAQE$I`VGwXSR~h1_ zhb#%nJIWgS@nYt#$Xg9vwWOhAil_!G=crquxB$MSmtM@-Ct0#C-p5%Qka=@DO^nL!ICxW z?H5H!Z&lJK_SXmLSpL;${kM5CI^VBU$363Zuhp!h{qIf-7(lB*>wlyEWdHLh&z7{C z*gjeC;ML%u!wh<;-mr$#sJCG9HD1)}?OOdMyms1+PP569pIQIof^D0dX7am1U8OwS z6UVUjtv^N^ntJo>f8tw7S_;Jpw1Hn6=Tzd0WO(i^Wrav5<8E9K_aO@Rs@< zYRd`*M=A!mA8A-O?rcfkR%BqY2;U|sVVu0< ztMmv6{u@r=<1mU{RO#Q9g?r-%WEwYjb{a1!8y*4qIPZiN6nw%^DO+b8-h{$lbu%FE zo-KAHcBiGS!77HG>TB1N2HzLwZ=pls^%0%>A0EFxp@Wai(Ia*cviQN9$o2R+Di@n` zz#KY1CTC%oP$Di^Q-9s9+$S>yB`tud4zh*UHUzcu9K`$ z$KhCxpcC2psi-7H)iFUv!h(jJJ{`nX{ul+ZQ-M4-!OXdYnWt$NffrK=)|Y?KTGIh3g$HO1J7T$GXYGFW44-oVJjpOi_lM2)eaR8*^ub7K{tant@3 zHY8-Vw!-6*3DqOqdbhuR7D zS&TJA0x+To>yy=>A)x|yCcY{93YBXTOO~)HLr#f0Ucedum@q}waTO>*^)Cp?Smqm` zT$x-zyK<`Z=Zj>evQgtRr~*()+Jd+SW02Na+JcO;8Hc=QJYHbvkm5=5jz=RNg|T9Q zr7iWPV4zgeNpyrKRj0z{gqp09&V7uAvX83nUz4Dp4;{Z6a5C zdrDgb39qpARdVi8Ln-VYfe}OVHlRul<9!`QItjvO&~jiASS=A`567*CM{>aVZrzCR zO*G72b6D>t5vQpaaa*^D$6Ru(h-@=GJUf#5ev;SbPKn^2dkX_9)F+RlQKF#P{f|Z? zn_&yS90;Y($z+0khBXgALYrv<67MxQ#`*aV@+Ufcz7LbJ@$<-yaR9}a^ZnCzoG*Ga zj3#72K!$1hPhl%yZNSh~oT z#G(furPUom?aK%N08CauuHa&3q1^yldh&=AL#P4pN#IGmDWhfBh1PjDaY=0;*2zOA z6jPTHeZYN44-M*-!@xz*ENn(eVFQr`h`tWo>`-GYAs!6_F;CVK% zztKwDl)3RL@X)#j*Ik$teOq2O+3n1J4qgXO+cXkazN-~8Z z&>}34n>)L3U%D0|7o>bP3`~Hs5epnv$yjU|MgLx zto~O9YN8lPIg1Jl`Kho?MA-@VTgb}CdZ|-ZV$v!_M2*3CSdDDB7$FX;rEDFJPm7xy zT`y^g7arv3yfvocMntB9HgXkHWeZm+G3Iib> z#|r#>ayFVK*iy+G7Znj^SQx16qo0(5a0-L4yz#v-3WSMP$6YL#ZIi$y6+7^g`hW&o z?9Zj&+1SFvq2CM4G-55GLGh6=7e-ObuwGx+G2{(R^U0}o+b&GK!Yrv}kYS9h`m%4;K1I+O_q^#dxW=uTzG2XB(+mb5DP!OIXr*cKRWH74WIvnp%O5BLR=~! zG}Fdk>_Jh4Or*%2M6OS9uHqSMsjlHjbABV*G)D7 zJAlooeHgHFK3~jmBWWDHluBfblc*#!h#m$T8F8^sMY9I63za@JW_3C&v{5=nGnOqj z&}#Z4e-yb9h035R&6?1`am0B*-b#t^IXB>wZh6GunQ3d;?ne=xhm(qX(|+r8TO5I2q)1nQ@zR_NEq00N=?`sqdS%vaKDOY zk{uD^7Gs;>UkssOd%xUFp!2OpW2n$N9K!(mfdlEz^)JlcXEof}){?L$Vjam0X#sN_ zl>3_r5N#Q1%+a-%T7ZO@0@C7!TkLA-%sdjFWcd5yP?3O3p{1q2cRwF?k-F(d7%W9c z&4zs=u=@pT(gU@c=e}*6bK7 zI$yi#HkE{i|8i z)AI-q?EiwBA&&dk^8mVM{cp5(+d2Qgr~6+X>mmOie~d0NySweyix-p0?zr9D-FeZh zx0|C;i?@aIjdI&bdDc~CE!RsX$qBY!4G23uZdWdI738WmYRqsT5Gko=uLWQ*gG%x zKx}!@-Zd-0b1U1`(cRf^>^g$|jNNDU4YU4F`SkAx-QURI;JZR+)ui1xJHQir9j;Aqo>a{*kOp(Z}xtSJ!o@2%utcV-e*|K$6u_ zL!WI^Bc565EiPFce(S0`f9^e4&EH~WK=Q^cbI@AkPgoc~>OXeP$ejNa+&ugxE&#rp z|2KE*X#bTt|7pYX#Qz`TdAk4iv+n=4pWFBYxK4*HlZpKTzm(){M!11i@9`>DuL=5@ zja=`8&>!(&IcGAd5AWh@vZDKjrTN@GC{~SO%U* zS*z`}KLcVas%Szf1XI!MC4jgx5;wq%Plb2WAmNL+3|Xo)Q6${`x4pCT;eP+lYLObl=D&6~6So8CFbOoog`&qVrZ@6F+{PG0EI5UGQcvvZIa zFewhnBuadL)5jQPM;Gsp2bgxvq4IFHoGStIp#Qpeadf_xRx0g_S4hDreTlRCl6nc# z{_3h!k|l?S#|Qm?4Bz#CKo~x7sgGEC%qMIpVv+-T#gu@?iu<{FhxOGLu?I%=IpMil zsxbSnEL)pEDF7iAAk+hV%TYheLgv%_YdDYbUT*zcvKXTc&sEWim?jZNdp`4mZETL! zp;o1LX6xuI)X*}o7)iP|(w7)5k+*p(jDSGu<7tdSbQy`DlQIzM{4ofYdg`BtbWbPl zXeP=c5CgXBB06XVwpNO@5;_{dRDx2M_0yYz?LeyE54&8?=$uC1$vsKAszm7d8EnG%Qt7OWfHU2upEp~!u zg|~hTSR&Mk7?o-}M>|E$!nH%C6b$4r(=cJ^1-V>3w8&ZcibwOm^@P!!b@jN zj-(eS9CPCrj;*{_SM*Az@w98%E>(@vox5cbv2Lh>vj5~Y6ot3Ys}ztUBonFa7|ZlD z@3&@N5^BsYk0+NpYeyEpQlo@x@8-TxsuW)en=4lC;S8@G?yBrgyqNh(*{WIyB7=BlWZOe8NreHq&b*irni0ytNns4WhPqH(37`6hK5 zz4K8`W}vBKvPGRqa>i`c**T8tIzKQj?KRQrxC=DDC@X-@3`Wdflr7t;q|76Wa#aTF z*5!nc1M&)4HcLqkWu&+$ae1`rnr*12@VnWXs`9e)A)%^En+VM(QaL##L!qw9(xNFj z(+y8?=ML$-R79@2D%{FIXGmxtt;*}59VO3K99^ia@%Zb+`n8~pDe->-x$2HAO8oys zFf$w~ZKQ)bfJ7jztnnpwP_W(&lW>SUvQQ)gDO7IaCa{mAU?s3ir>s??Tw_gAqYG&z zRfZP(;gGfv!y&vOeGiA^M(~D*x-x4G46$w8FbY}GQLU_0fF(B7=eoD2}0KZWtBc1+p7YQ1doSw=!j*;0$pou|{kia6d&@ zBwfIzkxwUIY3$B;P}W=l&{NL|n3}QiSwZebntMx1nT$dwP(&1pD1$stF3%XBN!ZSy zGf$Mhi)afW-h<##B#>Mapud{hRrPZ5jxXz1R}KLvS283gqDxbXRdLeq>XYo1DU9eo zP|J7uBy?N*wkOUH3c(m?45S1M8R`Ey-% z_~F*Y9SA9+?$|ftDwfhsDnyB}eO**#VESB6G&OzZAdr7KogSxYxxZEhoXH%S+g_F5`FpGWUSsy<2hYtXY_ zQbONAs-fI2^mk$znY+r&WEPh}s>i|*oTQrptWBhx&=3ln#?Z)C%vU&|$j zy^|jU@!iZRC}}OD7%U^-L%1g~06u<&+)?rZBk^w5S;Tcem1GoNy0w}---t%iMMs?u zXT0NtErim8(p_(KqqPgbG6p=UgPkvZB-DLy7;XpE*vP}@tHnyRs>AjPwC4t-NMDXb zRaOS7#meYZNylqeFIa)_n_6d(o=ImdX9`w2I4`kWM=H@T=YH&oxocTg&p5?OtqZk9 z7zUX1=)yXeOu5u$fB!o;Io5SZi-u4~pi!FWSR!wZ=dYp-Fk_T8SM4SkJ9^N+rIUGJ zc;OOV&>hvllA%5u%Cll=yaaoKJE6%P!ca6v6)uYtg|Y7?a^g_{oZ+0qXrUG?HIph^ zP<9c~lp9(yH))1R0V4t9c6L%_kxQfa;qA)$My>*2+g*u^tv}cZ4uPOLOSXXLtp&rp1{b@dyT1c~~aS{+! zS+ZQrc90tOlj7vQ6qd0+0Gr8twn^YCeh>Y!H9P=m< zikeJgP=OCiDH5jb#x@>b0eJ0+Jce=P9v$64F_^*9#E$)iwXDp(0BST8arxdYmLf>8 z=nopbz|sY>01QO7=}2kR{&ZQ9;~g&-FiY1bZWVP{lZuliu81hAQW637U0^!PD2+FT zEH>0(s3fOB3dQ4)p}M46XD`by9cf3Hv%aAb&f4r{4COv)t`Co0B{PaA=ld=E*N})jRi~mFS ze`fFhYPFy4|9O;0o?FuOxYExm-Vh=~atjK+P!LCzBI-{J!~LZ@07YKbx&q`^o?NQJ#ql9b5R9z1i%l+pRl|#*Wi2E#hV3&+yjnm(Gi) zsrMiJsQK@SGviyl;05-{U&8HIKX=dnMr*IWn_K^zJME|VKacWkv8)4d`+*L?n>>K; zbpih4KERt=^kuw&?{@>1uBj2%4!-D)Bb`^1>C7J~@6}U`Bd>A^o|;#Zm*`eCjMg8@ zYuu>g=^d&9Q)LxkTZ;%nl&_)-5j!szChg7Na>^WW*D8lyNhU`X^p`HjRu?V^+ zuCMULjje!KyqS!QV!v;Y`K?eOi{OrAQ5t&QGK%rG)W86MHpK~02}z&4^lvdn)$@efU2E38#d0WZiBUHR6tHF>E=J5Ta+dA^xZpr03M6^S z-33(Vz6&e}*hr;A;qF&nw}zJh^Z9ZEikF(5ad)hcLWc`OKk$>`kj@oy8d`khnjjz! z@nVE7X^x$tN>R3u3F^>_G4gG=-dmwXB>=lMONi1Z<$j! zTvQ91hSu&d$1_b;^)ah*X*aIc4sLYjHKbi&q0$eai3_YOIDnPmjj^owwG7gSP3j53~h z=^<}NG_qraU7~Bv4FIr^Qyb-#UHK-g+0LZMu`m#mSEJIgRb^IW72ydVoX;4l3g@#x z7XFIM1wPF9(3BfefiB=#PQl3fG5*%75}VtE^kQX3m7N z#&MXuh74mPN{f0N$1!Xt$uD-3(5ZzOG&RWxb&8va`6(qScui+}Ui_kppjn3)stom9 zTr_UO!u7{MW@Inf@Uqc~$_6Q{s@7WM62xZEkt;94RhbwqwO#LI#ohu@E+Z`&u+zoe z_?Rn|txLoRF+Sua2ads7O&PX>aPYQw**0;Kepq zT`Klzzqk9U&Oy$L4Ov-U!^`4*P2zW`a2^YDKPcy0SS_q)9RUxY38JnDXS`TIH{clL zMl(5&2lgCxqxC9l=41u|M(4(8RdPKLQ_Jl5o$tm*zbs4lY`g=;o7qFgvo#s<>h zm20sv)`X+`8lIJ;Dw|uC?**FJ8#M_UNYZkQHzK5%2 zpdm&yunovt1DIv0clM>gACX2p?L|rjx-EKsZk%*-iW-Q*XKjnUc@fyq*M~0*MTpg< z@sbX4$35Nl0J5vV2TCSqo4BI9m~Sp z{a7}DRB273Ix>JNsm|(PIiIgSLu{(#C7m^yB_kk}e=nEIj7R~O&uR4jz*73K6<@;p z11EIq{6`%5e#ujx^ttE!7tek3_8;xm)A`S%JX^o-_~*Yf{;NF6apP6s`JoKDddo*K z+NvZJI8hvt==i08fPhn&!2KDU1yQ<^J)V1a+U0AYP{QMA=Vg1i7JrxzkcGCdJkguX$ z(%qtLiQG(PXGVTUj5ot$ApaLZzvLY=;%pR!?ii!aX=CxU<(Wkm*WSNDe;rNEL#Qv| zWvg}@Ls6o6TF}xAFmVMHf8PD&hdNp^2tbkr_JqPD3=f#oat`Ca3_Jl6C?nfi3T@i`)$(+e*kg@;ajg6)Uq-}z6+y3Z z=OG)qaPrO!<5F;@4eT2DZVeac8Hf3*E?(H%d71Axg{yr)M<3X^C+~-+=jnNR{_~&z M2TA;lcmU!605^Lj0RR91 literal 13682 zcmV-&HI2$2iwFR(eS&BJ1MPiXciTp?V0O>hv*+?@pY~z*r32~4q(g#V`mmYdoS`Mk z*0n6@h?J8YMazI-Q$$Px8~~KeB)$*(OZF%1`~JDDs{TL&1SL7PHFsu&nOH=lySlo% zy1Kf%s@kr8^~IlBt+upMHFR^Q&)+4_oYe|glt z`f3ryP6(*DlhBLeU_RlY9WJ7XPp6d|XZntZA|GTMk8f`zwnP8@J9+G?H($n+z#q7Q zKlZNd`SP>Z0rc3}+FIZLJKJ^a|Hck{-D>QzTBF{q)xKi2&yGpX^H1LY8*DF_FGKHY z60-*O%DxjjXTdOt%k0p1ZRYqR=EV_%)_YUWiFsrfHdtpmWmG6)A&+=?%}1D=Exd<3 zHR68kIaB=JpLh|2|1bRcVmS4p2_La}7|cVDLoX~@6f8oQGdCD<7L2pt06;!A{Rx&) z&G3WR10c>YxaMpW@CZ{FzeYqb-Cr`tjTg?81tIhNnEN9>0w{!%>%?B*W98ApotOyO z1=Rqb1KM%GrhzkJ{5=4OpbQ|jh=UoFcb(~y-AuU8fO~vCWyoT&=U)|E`w;32p?dF{ z_a?w&o%!50K(yI~HG)=;LubYZ;~)fhP{6t@e4+lgtG_Hj0Pt`ia2;F$QE!~(=M)Eb zjQ{#}bBootb{Zi3YrF6pJ~p>^9>@PfJel#|cpU%h$G>a8rZMtT3=tf<-W-OkRkP`T z7GoPwSH#`CtaBjL+_}MtcHzfRte%%^=De395rsj$R?EvY;$vqqjn$C#K~^9dVj^!^ zP~_jlp#Gi@qriQ~<5M0+f$vPccxeOOW226KxZt{yGfuJ!$AI`Xn0hl0u`L!DJWXt( zH)irHn>h11$Sz0+c>xVypkQOMHv_5{ha3bLS3=N3R&hqlQlV7HYt%Hjx&j#~1rOBt z8`WrOAsAULnMUbYVKC*GXt`4-!p|0Wqk#lu_u|pi8y;vG`k_pPPtpLqpmHJ9K zCcuJIQSL(Ha9P%wKB!7Qd?}IrSs%L^@pv(>Tyy{0jwWBEjyvT4_BJHew{~~8oAsUg z);7rhT61UjQT{)~v+;Fx==s&56HQRRxj~8PKZ)6m12PH*ez1t=ix`DwJb^(4@{+|M zZDBmkJdh_W;t^^tjJT-qwtM#Cq}Lt1Ioa>FH-D?QDr09F@ehT+acc_eeuMKIwN~oSeMw$qxdwl#bS2U<)AK0M4d~FqWshP;fI_ulcd^ zYZ`&$lfh~CtasS!caQsn{qAY^c)xqRci8QHKt#SnTKXU``1^SP()9GC+dunp@b>Vm zf8IIj9=`<~&bpm`w@t_;8GqP+IoRv$z3L8LoxJI`)1>^@>W8ZBI_`v91r*mP`+>D# z7G|#w7;2(pnE#+Bfu*NYn(q?GuMX}8X=FWrA@I0o{{2qB)9dzo>kFas3%nE=<@#rx z~b0KU>V1!xo?=F)YrCo2rJAZQE2O4Lr51m^tq_Xdf_hg61<()8{t{geMV;yqH(! zj{6RV3TQ)+7Y3$a%hOK(6&rHbf&49kT@i~B9JdJ;Fdfq}1g5$XD{}kFUK!L;sf;4P zyYgWgkEpyv1|gWBrMh8G^$Xv}whPFhfbn4vykjni)u@C}0h1MwqADzw!c|Zx!sQZh zl>3g2yvP|&dAqk)uRX8BSE;61n@H6GQh=nIY9RKQ7_9JZ?{(*8w+GTf2n1GfndQxh zWss`hfkFyOVG)1ZSHOwzR}s2cx~N^2tP+45aRpppcQ{skJaHlb0qSP3h+rM)g3^N| zcC@Ix@t{{7XvBETLmZV7k!u{M_&s3-P=t^V;)z3&Sk)8-km(7i;J~vy=0q+29>zG5 zjtB{5o$J7ZeV}D|QDvmurO_oZ$oO3p172=^75_k2o?qI z34@v7m`E%s3rSCG=A&$oAVscwk?M3kx)p&i_E<97y^xl&sJ^Rl0jxV?m2iywqz6DK2}|KD2e*AJ}!XqiuYIN@LvE@q#78F<(J|8A4ke_K1d zTlL0H9oB!1TK#eV??E29{)5%uA3F(HzZaLI>bdBn+ zozO$!5w(PPMr9YJGv<^yci+>l8X)Zkvl%q6#c+^Hq}b+DOXh}rsV~;%U#z?3j9m@+ zUl(3Tbc*cwef;@(?5^=&+ioW3f6W@6|J8TukK_L#9$asSID9I|JMDJeuJ72*g1?x} zm-yD$u^WZOO?34iXwG_kKHp6^mlrrx|j!ON6^qlQ39ZP1wwH!(il(k(ev>IwRcpk{X6?Aa3noOR|~= zk;PVJPf6#atwVtWseHCe%yJ} zZL2-iNTeO^x9c_dAHBWQZ$OS8WsgU5?t0_pZCj5Z{5X^t%Kc{|yXrLO+VQ5StRhKu zuZMgdKndtku^NU2GZcd%jweb=WsO>p<>#M&#x(c?FC&_CaF>`>D!5zWVgFu%1P+(B z<09+Z(92N24KS*Uq4ymBsmwgzn>o|gb9zJ@e^vdI;#!x8YX`*%Zs{pGk}sNQ`-~*olwZRF54pNkzy^pV)rHt&+e{Pf z(jZKWZ6+@k`-v83-+Yts+F}lhGiQ}$&Nqn&ew^o3RN2hynl*Vx*n%}Bu<9$e*3L{oqEHDEZ?wqp4nRrQefVcX-#{}Zm_G+nNP00Um?rZ7L>}z0uKnVHkf(* z;nayLHM~~-M*ze-0ul{a|V*UGrDtvf~C%oby7Nxo>l|6xO ztDSm@jo7?QZ4b6KX_t}!m|L9H3CNJDmi1Lzn(I~*qrp}-k2*px5YJ0t2=YvC^`_ba zWP|Q)CTx37=RvgJ83(~MDo7c`lGz9cs|M8-(Ccu=ld?Dq z+y{x3f(7D3PsP)FzYq+fIqH*@D-_QA$R3+CZ({9Y+#v|cGDTLeEFc4B zObS$hI%=wsr4fTusAL`k;~^s6uf(AVO~^iKPFZZ#ba*9Yb(H;#;7C#BS{MluBw9-9 zBg_(51hDqRG%5xji`>wg#~CD$;DCW$MZGHUE7;x&8R$hKJ!Q+4;-=d=pZa$K1BF%2 zBvo3Hv!H?vLRBsufg%&AU4<$ z88gnD_qd8eWsVNG*CB&kQjxC&?4xnXgHR~osg=Qf%6LLr9Wp%TDs+E1J?VAXGk6M$ zbVhokA(d0qG*RP}vmvVDsC|QAfx&~O2!Ri(=MYzvYQluoqN#zJYr23GKo|m?4Hw>Y zM3$U-^6lyjW*95L+QAPh zByB1vVJe|nHm_!_t^GlmYcSSeK?>d%gLKw=0Zs;B6|}36t0_h z=!^0H%ff$`TBD{2zp|V>>b&S4;i})(s+o<;@(P-$w=FH2t@SyfE83eT%=^twnF&dD zd8LBwX-oEmR|3>l#kFFq8eV_*KcN{gVI+veya~d0VmVB-CsOLUI_E&0RY9;; zaT{@E#Nmp7tP(K`!_^0^C3FGS+223w_IhpUWzc$7dsh2whcR~59^{Cr&jp^IyUza_ zJGB45RomL#+1_r@`Tyhj-vd4W<-h;eum1B_U%hc$cG6=%sC`sS`|4ldzXtsG@9-bK z|KIz8?DYF*@+X$~Kk(mw%VhaaN&5dlA>hpCpfF9HD8|Jj?yYQ`_Auj<5C5C2QU2e* zME~=X+WGk8K6drgdH;$#ARhD7mCsVgo#Vf`lNtZbokr~|_WrZfe#i4qKK`3EHbVug zUEkeqHlH;b+ckT4cX#V~{n_(oVdptJI(*SN+k18Rwrjt4;yAR|4%ha-H5+?Q2cNy& zy&v8T>1U^~{hajxz|oI)0>}>bN0Wmu{^TyuK64#+j(_3*Z;bys2dE4z> z(k~0##jO+ap=`Gsb_0^+%BbQ_VF4Gn+s|w?63XA;Y`2@Z)2hF^LCD+fZTlIddGn>n z+-?H^tZWYJ4<|xPqitn&^rr@)F@x?{_#9EF*;|jI;LALzMe!G^A=?^k){1(ekWHx=B}N>X#n8gQ>Id@#sV68LMBvxqDhbAj(z$F4>O&$Mgfd^Qv6f)L~#}4imXs2vz#D@#?;)WFI zCPLob7FzX$AqchsF4+;z42@~zcpF^Zl2Vc?-#1$j`6?SY}C;IXS)dkpjoH+zh2vZ9RCmTY{>n- z&67FzU-b4{%;1OeHLKh8S`!9e{aLlPRjoaTpRKKWtI=TbFRc51&Nj_WbMm{zvr1{) zBh0WWH{J+sXnk^L7WN;~)U(IAJl>R57Pm{Z<30<>r(FbpfdWbNErNgJL@1o*9v)lL zK0KMrkR?rZ5ryJMq+jt@L0Sq&G2pKZwtQAf}HvVNJZD7gxx1EeJtf|ne~p=~=g5Go2p5C!BmMtDM>ruYz@ynW)vNN_41tBx{#<)d-) z-UJpTWVOCTbBQt4Bf{jsssrHvdzC0-Kt!~tRWPDkwjRZtOgwk6u2{yruZvRuE{K3z@iK#C7$sD&G`G6DYlNvzzM2-en7^u zUjygL&pEs+he~%gkC!SNA)g`@fQl0gqAK)3QfFZUGES!)@}BW%j-kWy2gz$54tW?v zN&pr%R7y@qsZS^25n8I83Y$|xS$*nz7!7R?kGdE0^rx3R?i}>`54{t}<3)Fcu(1}5`UKLR;1BK-WllS9GPaRr=C~q8oK%`Y)Rm?0i;5wV ze6i8FLydd^b5zcON&`dK_52d>l@;7L`38$bZd+)iekca*FF0C-waG-1UXt1* zu8j5|Z4o8BTR%jALT0se0USYBjfeRiLe7jN`LS4HRtor z6r%|m5Kv&6{?mvB%ncY?k$P^#0U31W+M|tXDo+ttK|x}*RPr8nJ51QDNkra1?h&+8 zZ$wj66}q05M)uqriJ2YkHWqW7*;~z83qv!ic9D1$$iR>a;(Z{l8wcT1PG)a~drb$Y z+LaN20GOA^$V7{VO@j})HRoiLh)9l*}LjtXi4vC1Cu zLq2z@uL)-uiICOhN*{nqI%8b7Y6mZWxQ{Zi_c2ZyIUS|?0BM-B4D9PQaa z{0*1drre2^zKhniIPb!s7zJ)*dqI`^Xs!bzsp`xKYae%7Bi^v}jK{HZYS=%S1?W;A?iRD4jz=TN?k9uiVXF1+UN)43;^ z%$cO^Mm?LQ1=cLQy`_Q=PIBmsY7FY8=MHtf#}p33XUaWvg&J zn%tc5YC%)Hh#-T`+aoG&L}bjtk*k<$8#qfTe`zYcTGc8%z*WRw5ri9py# zFi_HQJjc&Ba`=;;*kPQoN0N5QD*K{y4t)=ojOclROq)2>*{r{SZVvovFdnlp zcj5(V?#hHdMn&!CmxujV-J{d)+2HBV7(@sI3q^&TE-<~{#Oe%{t5_Ip)zoZ0dThrl<0RR<}7QJ9=sNf|m83bVv4lm_|HW zWzGt)IvIQK+t%yz7w8Sz@Ad}PchpjVibUQlQUpA#=Xd4;7L`$(RmSxahJF+gA0elR zV!SD!>F~%49~WLC!*??LA%%kaB^8oY;aI7Nj>`)`aHqdwr9$WJk2=;r(4$ zkG}tDXLqOZc>mKwJf`RHmtul@_5apx6D>iSHDUj?Q+w?Hhj>=}o4-IEciR8dx3l(t zyS3)y`tKp0)cWtVS8qSL`03)OTC;i4eD>Y;clF)I?5E407C&8V7U?30=qjw0e!BdN z)P1t)X4&R*EP9j8a{1HdO0g%Owxp1YsYk`+;~%?v*6r-S>Dsf=7pvor@!tTBA^V@L z9oz)k#rS{q-ADW12YEJ_vcpAJ_!oFOq~fx${V_=v-uR`xEMz-g#sDl#Ojk7 zL>GgN4Y@F{L9+(B0YUlDn$_uRA&k-ynz3xKp5CTE@`j-kQm8Dd(yR#s91J=4$y+%V zF=q_;+gHD`${wrY`n?(lRT1k@W=IN{{h-|6O@L_2P-V8Rz0d?AL==!7w-v02{kbqwNu&KlGM8?v%E%Lks%4A<}T zIYt0mP_(x3Lb|fkQLUo>I&L-Ll(9cR44^eT-W3tE0yXJpY2sr7`I-Hh{oFO*<=%C!h$w#KDzldd0xHLT>BPI)l5vwP<>L9^irXpMO2Te*bbFKzGdl^(LPG znCpMM|Lt-9e~^d#f4mX8$n5NFHJ?2jk9S5}jh*dhjoMaYIBfDQ;e4aqb`qI&nVQS> zlu0_oAxaie5awQrWd$+?%YUreaJRl7s(}UURp`uMlDs-v<3BWYhJ1>N;?)2m4Kxk~ z9iHvhx1IXd*md3AQQc|o*0!44&Dy$B`{@RXY+iIqQ>^q%l4<)g8XYcf%rDU9Jq%Zu zIAN)#D#t1zz3IY&#HGp&e6-~|52;nR1;{f(<|7bGzXtWEVb?0Rb_Khms|+~EYf(&udvpLxJ?YSL zU#5<`bNJh75n+|o4s)=_u< z-FbYRy~WaiVT@N24D_Yj_I&xtb=)rh8#}dTBXj<< zwe=YP<6)l6{U43T`#)~E|6_wyo>uS$q)v-1;&J5}rWE9DhPZ%MzoTWOe#Yo$Hgw!~ zV*iNyiy4!jy6{`1#!I?yP^NeMWtr`v*A_Vm?t3nf03G$p>|`$D1ee*lPaX&$bK`;l zawrx{HilfW^$?B}R9u-IsMMSipuc;r%KQdjB9Uvp964xxYXYLBh{Qc~ym-oEUD3(# z7ZAwDAV>&Hq+X%mMFSD+Wq@)OI=;>NXhm0u!(~fcPDyo99t8%nTENZ+0s-gpclHq5~+P9ZBK#JJ{9Q1ap(;fz}7{9 zvP6_j?^?*L^cs-6g?L|q*0}-=H3XcT+Fs^QZ_}$}~_U+`U(wv;JVOdvrwCi65L{ zD9<*`?Gv)sGwdnado&=>Gi z6p-PUNPpSI7^O$&Z;pHT?TSO?!E`ZG3g&+IpmToIU-?!j?1`W71&8znj_wO;B@Fw^ z%R)hx92_3+cYheX?*51{JdjfFvGjO;TrQMY z<_EZd6y!_z~QMXS`xz~1YIrkWwaI=K!}dj zBCN_us*27+h!%Ot_@ry2{SpHck>`V=2q>f;p2jFb7m*2Ci2$L-AA@44hkiexd%AH& zGgfwwcwvh!qJvf-wo)Ci#L|JP+{==RZ=C;@BzgGIsp3m z9fu{I!drkSt3dyBss+ivLO+=2D;Y@WHkf9h^Zs5L{kNAmfsogfgWhnEOz=|W0r80w zVRXf!*}O7PGqy^^oq6A(`H&_4bs%fDO6dkF-c$Kksb?pJi^yrERC%<py-49XIe#7U*3BnL|8WaXp9$2coI z_Dqh7+zFpK0NoP@CpH~24g{}=j{u@1N1T%PsH+tvoU(**VUdauS(hbKX39c%>6|H% z^yGwnZoFaNM&ep<(JJ|kCrwK?sce*P-L8v>c|#SH?I*9HFt~wMC4g+9naJIak^G+I z{npG&N{!j%(fC4VZOP)7s+Vx?UEdb^D%F=Vq5G@p8L(JEUdoeOm~ znd?Fb7bkLsZ*wMnLqWxZVjal=r)frG0hbqY+Ac9rkwM3N&6jT}5C})*RYH-BP0Ay& zG%CITQn3w=Uj801bv=UbsVx-g$1PM$WG^6n5pe|SDEU_cnyVzj#YR;$DwVX{q(-B6 zK5mm4WNMjWQD>5pFOYS$_iOF%}F+8q_`+iaky-oIFu9k?Q~7B%AiOJ10q$M77$jXTrpEsP^r~C3DE0pX z!Awc01W6nB0AfM3qNbNvLBV`Ch=T#j$Xt~Sv{1Q-8^bz|f|bB3owAmRbB#Gk^)7rX zs4_I!4+gY^7!2SS^7mjs&Jr(qpewUhz>wI+1*6avZG~m21R`1b*;to_dz1)53X5uy z^tZ}F2()prS5|^yAa+W_HVP=HfZ`Zx;fA;3IKXzJG48w#lBG-$I5@>zO05w~2V74P z6=^Tv!pNsTUux=3xnI;$0mxI!D41HX@o7%(dXjrXYMG2eCs0HbiYS9JPfpJmpGnxx zVDO?a`7NR?1b7dUOOZe_C_sBPx2x*qH{vXo@J*sbiLiZbwq2{$GM_vYY2FAKTl9;e#I*0&6CtqedaW#X zK-#rZA>B*nG>)rgvE)(RJ4iG!iE{q3QLTHI3BM#I*XfJ8oZO|->u~H~gY$@JYD&vn z$m51z?pBCxY&qC8wW(>3*q!EQq3#+}y_S(?lGqX@B?X5CTiJ0aNZmx3W20!sD`Ff& zEl1U159QPz7nd6QYD|n5^1dI+sHBfte_Z3%rA6whT$xy+TxUA9P_;k_AhcQhsNXJ( zY?tOZek*%W+ns_)a%t0lNiL7&!-cLhWNjEmQhW zrz@4{KDNc>vBlpEj&6w!>kq5#(o?f)6A`(@{>*A(>?Ycf`K;``nUAG-tEKwCoS#od zTVA8emDZFBIo&Spa5UN@O)ysa%LI02y9P zZ3LIuZy!o4$yCrPnNnm0EDDg7#IM_E!c{SANVA>s#hmt5H}6-Ps1*LTAvDZ-E-x&V z#kyRjkg5CM0>aF88yMB%dw|mP%z*m{iI%qn6LQR`TR<04jKAEDSbV;two14>4Oq^ij1P{{%zFK?o$-_7Emz~1s)G0oc;Vo{I&FHC>qpmR+Ze1s%Q zQALO)gSJLhFd@)qm<5!aTANXPH+n=<>2Pc;MKDwtEU`hYrpw`hO1_D@Sd98IAS}uT zDeIWre$JE8c(z{5ZdRA>!JQu^NWUzHtrMWDr?U5!MYl;cx!cmp64N}v3u0%wXJo^! zU&$rAy_M|)k#6P`oV1crbe56tKGKr}03W?T=_qA^@$q)inJ0BW6l4_M&S)e(--t(& zNk^Ryr@Z5YEri+!rMq576pRsor3`oy2Rl#tNT~Z@F`N)?W1|e8E$2&tRg3KrXxH&+ zBYiOxRaq&h7AvAtB^|F>9e)YRZ(^N6`%F4(F;%eC!f}b^T5=QpV&+Az7`v8bb&ON2 z#JW&hgkgY5moBVz$dpT6_WN(WlVe?nv}g!)oE^0j9ZBNN@cdQafH%xZYp&|GKeBbF ze@iFxAn<|(x}e(%!Gf`UHjrn<(s&8h1h+zyD};eSM-?uL6NQoI#&Y0Mn4iInLvP_O zSYjqsw4m%fq#-x3WNz9SrVSVg7?-o-G7H7F92zFkuk<#$Y|S2R+|Y#KC^Cr{^k{cI z7e;r$b-Ik)4n>;Fe0b_p7Mnyta$tMo2z3hggnZgPv(0)<#W%#d5|az{2yArHY$SIf z&7#IhKv-t+Vm{@Wv*`^qB6KiT07+MMH;~9H41uv5h<-9I@z@Ka7#7Xsnwv(cEYJvz z`$y|rPc*dqp=Z} zZQXn!f)tDPpwSCVU7!m@;(Q#8IV!2(a&f z&{>2@R4o*-feyAMB@J>Y9)}EM`aowd%9NJ0Bg|OeP!FeV_R@zDLOj!ONaSTZPY4-{ zb5W(OE}BZDOkxUok)qvQ!&+xnW(0X?-HA6=Pck9tigF?XjdcWxhoW^V&Vv(i`39s~ zADOwUdqdDH_hdss5pKp93k;V;Kbu$F3&Q`5x)8>MPHNKq2ax~PmdTILLp&+_|1T5@ zLXV37_a&nMef;=G51_~K{}9jP`2XjA z)c7}p76~`k&vy~{UE{yDm5u+=Y;HaJe?Q1GcA#N%@1irEUbfpcyI$Y6w+i!U5qnd- zwfnjK>~ZM*13zl~yW-6F1}}Jlb@JzM`_<3wh-ZVP9e}s) z=>WXW1NcrC;7|7fUWd`=@dCcv4OqIShFm-NqC1XsUX3SHZ>YRiPcd4)$|YoKc9Xn7 zx2oY4TYC0eOo8x||Lu7EL>+tyLzQg03d9l@EhX8t(%`Wc1J_9ORXiMM<)Q7mPB%6x9F{{IuP1 zr1CAIEKZpBuZtjd?uH?;M4^Uv0{2C1+MC@~RdmJeC@1GA~jX{z>|E zrHklhwgD~$jU9hFP9B6&B+H^vPudc}!y^LpDiiBd`T;T{Q?2p?`^mRTbnXS|DF-86 zkyX0PTISRY7u6ip(ApiwcnVcjAG1mqmHOq%!Hv$mLew-Hts$y9{vk>d-O%mUbBk+O z*EnxXSZLQ_ObOi?c`i}nzRbLQOQt8MlGb^<6=!6fB zXAD(^<5>_3-=boU4>LYAd0kPmB%CO3fM`J{kj`n0Ysh%zqeE#CmN9#4EEeb%%CGzR)s4w(OYV{-b$;zIjUSr zTQXoLle>|aDV45E#0W7GvKj<5JRT%n6GuV}L%Et=TbI{e%uJarGv8OG$k(*jwb{;x zR=W3Oo2f2Q`?TNNT~%iwXVr$ZF0bLG>As@z+f+D9g_-9Uvly0h>sckh{YQeRE5s2m zCeSrB2EEbzoTUSM3aimtnKd#xg8-v*W3;MU`Xi&m+}0l}`cB$(e-%`lRqDBOHX%{W z?I>dbX~@cz*cdCy(OrnAHL22itg_9(Zb-hQz~mDC9vlD*_7JiRa4!Tj=h<8jWK6N~ zAL?Gc@sR4S)!~0z!&fxCbf<};UNj(kFJNv5gYV2}rJbR;M4Dh_1k`H(h#K+sW0aaX zSXb_lYAI+)5e;k&@m2t)Me40>$?->|9#2}4aDm1}kI%J(P7YCnQ24AYvDZ%mYubAM zi6M`$x-?$WA?~=R+a5r774$&OgpcULdnQGovc8Q2X(8^#x5zU=hhmD#pf(0KJQSbW zK$9{9#YHC&nM!G4md}=YP+=6MHS^dyib@kp>12N!>158^Hk)KC$pEw^m?PCloo2LC zrrk0v%$)Z{14xFO1c%N`Wr_O)Gk?-d`^+}#P&VTXTH*5c~)qFhvd5~x0j~)N~bH;y_Cpk{E^j$BI zL0511FhX0Em;xt?BN83I6bRsR3KO_D#aQGgugfcGa>+=*GRLqAjF`+-52LCO9dH0$AHy~K`ETQ66r;q10f^^ z;s!cUU^MlHhVghwRqBjJ3XX&V2;r^jZPjoT>U!rYnOM?G{A?E$VgAodJtY;!c3lU^ zkgozS>26W9L~bUtGb6t(`kUb~koT+LU-FI_ayASDXM|Dbw6S=Cd1{iyx%V&7UR$&C z0P2f*(W>4=P?UI{CbT3244ip@dzCt=l`w~#lARj|opn?O8+3PQyP6L;5oYPiR+;!m&lBwBhF>}IRnU>eZG{CjlEgsiBhiE(UkLf zv6+e{lPfe5I%T*eeL`U}h6}=JF@yeJ_^tp6oRP+sLYuaKvs_&zwpf#294mjwmJxAZ zMbN9vdB~b3oV+%}xa6E^gSZB{Tg3@_%3-{!ix+mcpJy9R;cD;E(0exS(fi@?d3+w9 QfBfhF1CSStx&Z0`006wA3jhEB diff --git a/public_dropin_notebook_environments/python311_notebook_base/Dockerfile b/public_dropin_notebook_environments/python311_notebook_base/Dockerfile index 0e7211fc7..d43388b02 100644 --- a/public_dropin_notebook_environments/python311_notebook_base/Dockerfile +++ b/public_dropin_notebook_environments/python311_notebook_base/Dockerfile @@ -74,7 +74,7 @@ RUN python3.11 -m venv ${VENV_PATH} && pip3 install -U pip setuptools WORKDIR ${WORKDIR} # Install git helper binary used for private git authentication in Notebooks/Codepaces -RUN curl -L -o drgithelper https://github.com/datarobot-oss/drgithelper/releases/download/v0.0.13/drgithelper && chmod +x drgithelper +RUN curl -L -o drgithelper https://github.com/datarobot-oss/drgithelper/releases/download/v0.0.16/drgithelper && chmod +x drgithelper COPY ./agent/agent.py ./agent/cgroup_watchers.py ${AGENTDIR}/ COPY ./jupyter_kernel_gateway_config.py ./start_server.sh ${WORKDIR}/ diff --git a/public_dropin_notebook_environments/python311_notebook_base/env_info.json b/public_dropin_notebook_environments/python311_notebook_base/env_info.json index d6930ce78..d9dd0c4f9 100644 --- a/public_dropin_notebook_environments/python311_notebook_base/env_info.json +++ b/public_dropin_notebook_environments/python311_notebook_base/env_info.json @@ -4,7 +4,7 @@ "description": "This template environment can be used to create Python 3.11 notebook environments.", "programmingLanguage": "python", "label": "", - "environmentVersionId": "68827e0dfccc7df6685c1899", + "environmentVersionId": "689f735077c7c911fc10b5c1", "environmentVersionDescription": "", "isPublic": true, "isDownloadable": true, @@ -15,8 +15,8 @@ "contextUrl": "https://github.com/datarobot/datarobot-user-models/tree/master/public_dropin_notebook_environments/python311_notebook_base", "imageRepository": "env-notebook-python311-notebook-base", "tags": [ - "v11.2.0-68827e0dfccc7df6685c1899", - "68827e0dfccc7df6685c1899", + "v11.2.0-689f735077c7c911fc10b5c1", + "689f735077c7c911fc10b5c1", "v11.2.0-latest" ] } diff --git a/public_dropin_notebook_environments/python311_notebook_base/start_server.sh b/public_dropin_notebook_environments/python311_notebook_base/start_server.sh index 738f1ba41..569dc8775 100644 --- a/public_dropin_notebook_environments/python311_notebook_base/start_server.sh +++ b/public_dropin_notebook_environments/python311_notebook_base/start_server.sh @@ -29,7 +29,9 @@ cp -L /var/run/notebooks/ssh/authorized_keys/notebooks /etc/authorized_keys/ && mkdir /etc/ssh/keys && cp -L /var/run/notebooks/ssh/keys/ssh_host_* /etc/ssh/keys/ && chmod 600 /etc/ssh/keys/ssh_host_* nohup /usr/sbin/sshd -D & -# Initialize the git helper. Turn on/off features dependent on `GITHELPER_*` env vars +# Ensure proper permissions on the directory used by cache daemon to ensure it starts (create dir. if needed) +mkdir -p /home/notebooks/storage/.cache/git/credential && chmod 700 /home/notebooks/storage/.cache/git/credential +# Initialize the git helper. Features are turned on/off dependent on `GITHELPER_*` env vars /etc/system/kernel/drgithelper configs set # no trailing slash in the working dir path From 7b847f6810d99c1584519fc2388674a2bf0b2e44 Mon Sep 17 00:00:00 2001 From: Nickolai Novik <92932793+nickolai-dr@users.noreply.github.com> Date: Mon, 18 Aug 2025 09:02:32 -0400 Subject: [PATCH 16/36] Add OTEL logging configuration, refactor traces and metrics. (#1626) --- .../datarobot_drum/drum/common.py | 65 ++++++++++++++----- .../datarobot_drum/drum/main.py | 5 +- .../drum/root_predictors/drum_inline_utils.py | 4 +- .../datarobot_drum/drum/runtime.py | 4 +- tests/unit/datarobot_drum/drum/test_main.py | 2 +- 5 files changed, 56 insertions(+), 24 deletions(-) diff --git a/custom_model_runner/datarobot_drum/drum/common.py b/custom_model_runner/datarobot_drum/drum/common.py index d9a8765b0..a549217f2 100644 --- a/custom_model_runner/datarobot_drum/drum/common.py +++ b/custom_model_runner/datarobot_drum/drum/common.py @@ -25,6 +25,13 @@ from opentelemetry import trace, context, metrics from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter +from opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter + +from opentelemetry._logs import set_logger_provider +from opentelemetry.sdk._logs import LoggerProvider, LoggingHandler +from opentelemetry.sdk._logs.export import BatchLogRecordProcessor +from opentelemetry.sdk._logs.export import SimpleLogRecordProcessor + from opentelemetry.sdk.metrics import MeterProvider from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader from opentelemetry.sdk.resources import Resource @@ -139,6 +146,38 @@ def make_otel_endpoint(datarobot_endpoint): return result +def _setup_otel_logging(resource, multiprocessing=False): + logger_provider = LoggerProvider(resource=resource) + set_logger_provider(logger_provider) + exporter = OTLPLogExporter() + if multiprocessing: + logger_provider.add_log_record_processor(SimpleLogRecordProcessor(exporter)) + else: + logger_provider.add_log_record_processor(BatchLogRecordProcessor(exporter)) + handler = LoggingHandler(level=logging.DEBUG, logger_provider=logger_provider) + logging.getLogger().addHandler(handler) + return logger_provider + + +def _setup_otel_metrics(resource): + metric_exporter = OTLPMetricExporter() + metric_reader = PeriodicExportingMetricReader(metric_exporter) + metric_provider = MeterProvider(metric_readers=[metric_reader], resource=resource) + metrics.set_meter_provider(metric_provider) + return metric_provider + + +def _setup_otel_tracing(resource, multiprocessing=False): + otlp_exporter = OTLPSpanExporter() + trace_provider = TracerProvider(resource=resource) + if multiprocessing: + trace_provider.add_span_processor(SimpleSpanProcessor(otlp_exporter)) + else: + trace_provider.add_span_processor(BatchSpanProcessor(otlp_exporter)) + trace.set_tracer_provider(trace_provider) + return trace_provider + + def setup_otel(runtime_parameters, options): """Setups OTEL tracer. @@ -161,23 +200,13 @@ def setup_otel(runtime_parameters, options): # https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/ if runtime_parameters.has("OTEL_SDK_DISABLED") and os.environ.get("OTEL_SDK_DISABLED"): log.info("OTEL explictly disabled") - return (None, None) + return (None, None, None) endpoint = os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT") if not endpoint: log.info("OTEL is not configured") - return (None, None) - - resource = Resource.create() - # OTEL metrics setup. - metric_exporter = OTLPMetricExporter() - metric_reader = PeriodicExportingMetricReader(metric_exporter) - metric_provider = MeterProvider(metric_readers=[metric_reader], resource=resource) - metrics.set_meter_provider(metric_provider) + return (None, None, None) - # OTEL traces setup. - otlp_exporter = OTLPSpanExporter() - trace_provider = TracerProvider(resource=resource) # In case of NIM flask server is configured to run in multiprocessing # mode that uses fork. Since BatchSpanProcessor start background thread # with bunch of locks, OTEL simply deadlocks and does not offlooad any @@ -185,15 +214,15 @@ def setup_otel(runtime_parameters, options): # missing due to process exits before all data offloaded. In forking # case we use SimpleSpanProcessor (mostly NIMs) otherwise BatchSpanProcessor # (most frequent case) - if options.max_workers > 1: - trace_provider.add_span_processor(SimpleSpanProcessor(otlp_exporter)) - else: - trace_provider.add_span_processor(BatchSpanProcessor(otlp_exporter)) + multiprocessing = options.max_workers > 1 - trace.set_tracer_provider(trace_provider) + resource = Resource.create() + trace_provider = _setup_otel_tracing(resource=resource, multiprocessing=multiprocessing) + logger_provider = _setup_otel_logging(resource=resource, multiprocessing=multiprocessing) + metric_provider = _setup_otel_metrics(resource=resource) log.info(f"OTEL is configured with endpoint: {endpoint}") - return trace_provider, metric_provider + return trace_provider, metric_provider, logger_provider @contextmanager diff --git a/custom_model_runner/datarobot_drum/drum/main.py b/custom_model_runner/datarobot_drum/drum/main.py index 3f6ad281c..379e6914e 100644 --- a/custom_model_runner/datarobot_drum/drum/main.py +++ b/custom_model_runner/datarobot_drum/drum/main.py @@ -72,6 +72,8 @@ def signal_handler(sig, frame): runtime.trace_provider.shutdown() if runtime.metric_provider is not None: runtime.metric_provider.shutdown() + if runtime.log_provider is not None: + runtime.log_provider.shutdown() os._exit(130) @@ -82,9 +84,10 @@ def signal_handler(sig, frame): print(str(exc)) exit(255) - trace_provider, metric_provider = setup_otel(RuntimeParameters, options) + trace_provider, metric_provider, log_provider = setup_otel(RuntimeParameters, options) runtime.trace_provider = trace_provider runtime.metric_provider = metric_provider + runtime.log_provider = log_provider signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGTERM, signal_handler) diff --git a/custom_model_runner/datarobot_drum/drum/root_predictors/drum_inline_utils.py b/custom_model_runner/datarobot_drum/drum/root_predictors/drum_inline_utils.py index 2c9e9de6c..1837e72af 100644 --- a/custom_model_runner/datarobot_drum/drum/root_predictors/drum_inline_utils.py +++ b/custom_model_runner/datarobot_drum/drum/root_predictors/drum_inline_utils.py @@ -77,7 +77,7 @@ def drum_inline_predictor( print(str(exc)) exit(255) - trace_provider, metric_provider = setup_otel(RuntimeParameters, options) + trace_provider, metric_provider, log_provider = setup_otel(RuntimeParameters, options) runtime.cm_runner = CMRunner(runtime) params = runtime.cm_runner.get_predictor_params() predictor = GenericPredictorComponent(params) @@ -87,3 +87,5 @@ def drum_inline_predictor( trace_provider.shutdown() if metric_provider is not None: metric_provider.shutdown() + if log_provider is not None: + log_provider.shutdown() diff --git a/custom_model_runner/datarobot_drum/drum/runtime.py b/custom_model_runner/datarobot_drum/drum/runtime.py index 4e713a337..0ecd72fb8 100644 --- a/custom_model_runner/datarobot_drum/drum/runtime.py +++ b/custom_model_runner/datarobot_drum/drum/runtime.py @@ -5,7 +5,6 @@ Released under the terms of DataRobot Tool and Utility Agreement. """ import logging -import traceback from datarobot_drum.drum.server import ( empty_api_blueprint, @@ -16,9 +15,7 @@ from datarobot_drum.drum.enum import LOGGER_NAME_PREFIX, RunMode from datarobot_drum.drum.exceptions import DrumCommonException -from datarobot_drum.drum.args_parser import ArgumentsOptions -from termcolor import colored logger = get_drum_logger(__name__) logger.setLevel(logging.ERROR) @@ -33,6 +30,7 @@ def __init__(self, app): # OTEL services self.trace_provider = None self.metric_provider = None + self.log_provider = None self.app = app def __enter__(self): diff --git a/tests/unit/datarobot_drum/drum/test_main.py b/tests/unit/datarobot_drum/drum/test_main.py index 5c321a20f..fd5c05058 100644 --- a/tests/unit/datarobot_drum/drum/test_main.py +++ b/tests/unit/datarobot_drum/drum/test_main.py @@ -35,7 +35,7 @@ def test_custom_model_workers( runtime_params.has.return_value = False with patch("datarobot_drum.drum.main.setup_otel") as setup_otel_mock: - setup_otel_mock.return_value = (None, None) + setup_otel_mock.return_value = (None, None, None) main() runtime_params.has.assert_any_call("CUSTOM_MODEL_WORKERS") From df74c732adeffde92e18582a8b995ecd85c8451f Mon Sep 17 00:00:00 2001 From: Nickolai Novik <92932793+nickolai-dr@users.noreply.github.com> Date: Tue, 19 Aug 2025 09:41:49 -0400 Subject: [PATCH 17/36] Bump DRUM version. (#1631) --- custom_model_runner/CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/custom_model_runner/CHANGELOG.md b/custom_model_runner/CHANGELOG.md index e3e1772b1..d07a9dffd 100644 --- a/custom_model_runner/CHANGELOG.md +++ b/custom_model_runner/CHANGELOG.md @@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +#### [1.16.23] - 2025-08-18 +##### Changed +- Add OTEL metrics and logs configuration. + #### [1.16.22] - 2025-08-12 ##### Changed - Add support for kwargs and headers to generative ai chat models From 98d7b141aab52f340ca981a9bfa3ea32cafb8451 Mon Sep 17 00:00:00 2001 From: Aaron Ball Date: Fri, 22 Aug 2025 14:31:38 -0600 Subject: [PATCH 18/36] [RAPTOR-13851] pytorch: rebuild requirements to pull in updates (#1637) * [RAPTOR-13851] pytorch: rebuild requirements to pull in updates This rebuilds requirements.txt to pull in updates for pytorch to resolve CVE-2025-3730. * Reconcile dependencies, updated IDs, tags --------- Co-authored-by: svc-harness-git2 --- .../python3_pytorch/env_info.json | 6 +- .../python3_pytorch/requirements.txt | 107 +++++++++--------- 2 files changed, 56 insertions(+), 57 deletions(-) diff --git a/public_dropin_environments/python3_pytorch/env_info.json b/public_dropin_environments/python3_pytorch/env_info.json index 12f121211..56a640b79 100644 --- a/public_dropin_environments/python3_pytorch/env_info.json +++ b/public_dropin_environments/python3_pytorch/env_info.json @@ -4,7 +4,7 @@ "description": "This template environment can be used to create artifact-only PyTorch custom models. This environment contains PyTorch and requires only your model artifact as a .pth file, any other code needed to deserialize your model, and optionally a custom.py file.", "programmingLanguage": "python", "label": "", - "environmentVersionId": "689b9c1c00358f486400449a", + "environmentVersionId": "68a8911901dc4d0f6b00c32c", "environmentVersionDescription": "", "isPublic": true, "isDownloadable": true, @@ -14,8 +14,8 @@ "contextUrl": "https://github.com/datarobot/datarobot-user-models/tree/master/public_dropin_environments/python3_pytorch", "imageRepository": "env-python-pytorch", "tags": [ - "v11.2.0-689b9c1c00358f486400449a", - "689b9c1c00358f486400449a", + "v11.2.0-68a8911901dc4d0f6b00c32c", + "68a8911901dc4d0f6b00c32c", "v11.2.0-latest" ] } diff --git a/public_dropin_environments/python3_pytorch/requirements.txt b/public_dropin_environments/python3_pytorch/requirements.txt index 5e0b7eb17..8bc62c54e 100644 --- a/public_dropin_environments/python3_pytorch/requirements.txt +++ b/public_dropin_environments/python3_pytorch/requirements.txt @@ -6,37 +6,36 @@ # annotated-types==0.7.0 argcomplete==3.6.2 -azure-core==1.34.0 -azure-identity==1.23.0 +azure-core==1.35.0 +azure-identity==1.24.0 azure-storage-blob==12.19.0 blinker==1.9.0 -boto3==1.38.23 -botocore==1.38.23 +boto3==1.40.15 +botocore==1.40.15 cachetools==5.5.2 -certifi==2025.4.26 +certifi==2025.8.3 cffi==1.17.1 -charset-normalizer==3.4.2 +charset-normalizer==3.4.3 click==8.2.1 -cryptography==45.0.3 -datarobot==3.7.1 -datarobot-drum==1.16.19 +cryptography==45.0.6 +datarobot==3.8.2 +datarobot-drum==1.16.23 datarobot-mlops==11.1.0 datarobot-storage==2.2.0 -deprecated==1.2.18 docker==7.1.0 filechunkio==1.8 -filelock==3.18.0 -flask==3.1.1 -fsspec==2025.5.1 -google-api-core==2.25.0rc1 -google-auth==2.40.2 +filelock==3.19.1 +flask==3.1.2 +fsspec==2025.7.0 +google-api-core==2.25.1 +google-auth==2.40.3 google-cloud-core==2.4.3 google-cloud-storage==2.19.0 google-crc32c==1.7.1 google-resumable-media==2.7.2 googleapis-common-protos==1.70.0 idna==3.10 -importlib-metadata==8.6.1 +importlib-metadata==8.7.0 isodate==0.7.2 itsdangerous==2.2.0 jinja2==3.1.6 @@ -46,59 +45,59 @@ julia==0.5.7 markupsafe==3.0.2 memory-profiler==0.61.0 mpmath==1.3.0 -msal==1.32.3 +msal==1.33.0 msal-extensions==1.3.1 mypy-extensions==1.1.0 networkx==3.4.2 numpy==2.2.6 -nvidia-cublas-cu12==12.6.4.1 -nvidia-cuda-cupti-cu12==12.6.80 -nvidia-cuda-nvrtc-cu12==12.6.77 -nvidia-cuda-runtime-cu12==12.6.77 -nvidia-cudnn-cu12==9.5.1.17 -nvidia-cufft-cu12==11.3.0.4 -nvidia-cufile-cu12==1.11.1.6 -nvidia-curand-cu12==10.3.7.77 -nvidia-cusolver-cu12==11.7.1.2 -nvidia-cusparse-cu12==12.5.4.2 -nvidia-cusparselt-cu12==0.6.3 -nvidia-nccl-cu12==2.26.2 -nvidia-nvjitlink-cu12==12.6.85 -nvidia-nvtx-cu12==12.6.77 -opentelemetry-api==1.33.1 -opentelemetry-exporter-otlp-proto-common==1.33.1 -opentelemetry-exporter-otlp-proto-http==1.33.1 -opentelemetry-instrumentation==0.54b1 -opentelemetry-instrumentation-aiohttp-client==0.54b1 -opentelemetry-instrumentation-requests==0.54b1 -opentelemetry-proto==1.33.1 -opentelemetry-sdk==1.33.1 -opentelemetry-semantic-conventions==0.54b1 -opentelemetry-util-http==0.54b1 -orjson==3.10.18 +nvidia-cublas-cu12==12.8.4.1 +nvidia-cuda-cupti-cu12==12.8.90 +nvidia-cuda-nvrtc-cu12==12.8.93 +nvidia-cuda-runtime-cu12==12.8.90 +nvidia-cudnn-cu12==9.10.2.21 +nvidia-cufft-cu12==11.3.3.83 +nvidia-cufile-cu12==1.13.1.3 +nvidia-curand-cu12==10.3.9.90 +nvidia-cusolver-cu12==11.7.3.90 +nvidia-cusparse-cu12==12.5.8.93 +nvidia-cusparselt-cu12==0.7.1 +nvidia-nccl-cu12==2.27.3 +nvidia-nvjitlink-cu12==12.8.93 +nvidia-nvtx-cu12==12.8.90 +opentelemetry-api==1.36.0 +opentelemetry-exporter-otlp-proto-common==1.36.0 +opentelemetry-exporter-otlp-proto-http==1.36.0 +opentelemetry-instrumentation==0.57b0 +opentelemetry-instrumentation-aiohttp-client==0.57b0 +opentelemetry-instrumentation-requests==0.57b0 +opentelemetry-proto==1.36.0 +opentelemetry-sdk==1.36.0 +opentelemetry-semantic-conventions==0.57b0 +opentelemetry-util-http==0.57b0 +orjson==3.11.2 packaging==25.0 -pandas==2.2.3 +pandas==2.3.2 pillow==11.3.0 -progress==1.6 +progress==1.6.1 proto-plus==1.26.1 -protobuf==5.29.5 +protobuf==6.32.0 psutil==7.0.0 py4j==0.10.9.9 pyasn1==0.6.1 pyasn1-modules==0.4.2 pycparser==2.22 -pydantic==2.11.5 +pydantic==2.11.7 pydantic-core==2.33.2 pyjwt[crypto]==2.10.1 python-dateutil==2.9.0.post0 pytz==2025.2 pyyaml==6.0.2 -requests==2.32.4 +requests==2.32.5 requests-toolbelt==1.0.0 rsa==4.9.1 ruamel-yaml==0.17.4 -s3transfer==0.13.0 -scikit-learn==1.6.1 +s3transfer==0.13.1 +scikit-learn==1.7.1 scipy==1.15.3 six==1.17.0 strenum==0.4.15 @@ -107,16 +106,16 @@ sympy==1.14.0 termcolor==3.1.0 texttable==1.7.0 threadpoolctl==3.6.0 -torch==2.7.0 +torch==2.8.0 trafaret==2.1.1 -triton==3.3.0 -typing-extensions==4.13.2 +triton==3.4.0 +typing-extensions==4.14.1 typing-inspection==0.4.1 tzdata==2025.2 urllib3==2.5.0 werkzeug==3.1.3 -wrapt==1.17.2 -zipp==3.22.0 +wrapt==1.17.3 +zipp==3.23.0 # The following packages are considered to be unsafe in a requirements file: # setuptools From bd34a1ea0971942f00d7d9dadfdebe0b5a68ccca Mon Sep 17 00:00:00 2001 From: Nickolai Novik <92932793+nickolai-dr@users.noreply.github.com> Date: Mon, 25 Aug 2025 12:43:05 -0400 Subject: [PATCH 19/36] Avoid infinite recursion in logs. (#1638) --- .../datarobot_drum/drum/common.py | 23 ++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/custom_model_runner/datarobot_drum/drum/common.py b/custom_model_runner/datarobot_drum/drum/common.py index a549217f2..5dc168c01 100644 --- a/custom_model_runner/datarobot_drum/drum/common.py +++ b/custom_model_runner/datarobot_drum/drum/common.py @@ -146,6 +146,13 @@ def make_otel_endpoint(datarobot_endpoint): return result +class _ExcludeOtelLogsFilter(logging.Filter): + """A logging filter to exclude logs from the opentelemetry library.""" + + def filter(self, record: logging.LogRecord) -> bool: + return not record.name.startswith("opentelemetry") + + def _setup_otel_logging(resource, multiprocessing=False): logger_provider = LoggerProvider(resource=resource) set_logger_provider(logger_provider) @@ -155,6 +162,8 @@ def _setup_otel_logging(resource, multiprocessing=False): else: logger_provider.add_log_record_processor(BatchLogRecordProcessor(exporter)) handler = LoggingHandler(level=logging.DEBUG, logger_provider=logger_provider) + # Remove own logs to avoid infinite recursion if endpoint is not available + handler.addFilter(_ExcludeOtelLogsFilter()) logging.getLogger().addHandler(handler) return logger_provider @@ -198,7 +207,7 @@ def setup_otel(runtime_parameters, options): # Can be used to disable OTEL reporting from env var parameters # https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/ - if runtime_parameters.has("OTEL_SDK_DISABLED") and os.environ.get("OTEL_SDK_DISABLED"): + if runtime_parameters.has("OTEL_SDK_DISABLED") and runtime_parameters.get("OTEL_SDK_DISABLED"): log.info("OTEL explictly disabled") return (None, None, None) @@ -218,8 +227,16 @@ def setup_otel(runtime_parameters, options): resource = Resource.create() trace_provider = _setup_otel_tracing(resource=resource, multiprocessing=multiprocessing) - logger_provider = _setup_otel_logging(resource=resource, multiprocessing=multiprocessing) - metric_provider = _setup_otel_metrics(resource=resource) + + logger_provider = None + metric_provider = None + # Temporary gate until we have the feature fully enabled on main environments, + # to avoid noisy otel logs. + if runtime_parameters.has("DR_OTEL_METRICS_LOGS_ENABLED") and runtime_parameters.get( + "DR_OTEL_METRICS_LOGS_ENABLED" + ): + logger_provider = _setup_otel_logging(resource=resource, multiprocessing=multiprocessing) + metric_provider = _setup_otel_metrics(resource=resource) log.info(f"OTEL is configured with endpoint: {endpoint}") return trace_provider, metric_provider, logger_provider From f17efc85b24f0c959b9c699f2562982130b6c760 Mon Sep 17 00:00:00 2001 From: Nickolai Novik <92932793+nickolai-dr@users.noreply.github.com> Date: Mon, 25 Aug 2025 14:48:26 -0400 Subject: [PATCH 20/36] Update version for new release. (#1639) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: “Nickolai <“nickolai@datarobot.com”> --- custom_model_runner/CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/custom_model_runner/CHANGELOG.md b/custom_model_runner/CHANGELOG.md index d07a9dffd..7676659c7 100644 --- a/custom_model_runner/CHANGELOG.md +++ b/custom_model_runner/CHANGELOG.md @@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +#### [1.16.24] - 2025-08-25 +##### Changed +- Fixed issue with OTEL logs infinite recursion. + #### [1.16.23] - 2025-08-18 ##### Changed - Add OTEL metrics and logs configuration. From f5e64fd2428935507b1ea49488ad08c2403975c6 Mon Sep 17 00:00:00 2001 From: Sergey Gavrenkov <86368350+s-gavrenkov@users.noreply.github.com> Date: Thu, 28 Aug 2025 21:07:46 -0400 Subject: [PATCH 21/36] [RAPTOR-14353] add client and NIM timeouts (#1640) * addd timeouts * fiz black * fix tests * fixed black * add timeout * fixed * replaced with RuntimeParameters * removed unused import * replaced with static methods --- .../drum/gpu_predictors/base.py | 16 ++++++++- .../drum/root_predictors/prediction_server.py | 36 +++++++++++++++++-- .../drum/test_prediction_server.py | 27 +++++++++++--- 3 files changed, 72 insertions(+), 7 deletions(-) diff --git a/custom_model_runner/datarobot_drum/drum/gpu_predictors/base.py b/custom_model_runner/datarobot_drum/drum/gpu_predictors/base.py index 8b09f0f8c..dd46c3a08 100644 --- a/custom_model_runner/datarobot_drum/drum/gpu_predictors/base.py +++ b/custom_model_runner/datarobot_drum/drum/gpu_predictors/base.py @@ -189,6 +189,17 @@ def supported_payload_formats(self): formats.add(PayloadFormat.CSV) return formats + @staticmethod + def get_drum_openai_client_timeout(): + """ + Returns the timeout value (in seconds) for the OpenAI client. + Checks the 'DRUM_OPENAI_CLIENT_TIMEOUT' runtime parameter; defaults to 3600 if not set. + """ + timeout = 3600 + if RuntimeParameters.has("DRUM_OPENAI_CLIENT_TIMEOUT"): + timeout = int(RuntimeParameters.get("DRUM_OPENAI_CLIENT_TIMEOUT")) + return timeout + def configure(self, params): super().configure(params) self.python_model_adapter = PythonModelAdapter( @@ -211,8 +222,11 @@ def configure(self, params): self._openai_server_ready_sentinel = Path(self._code_dir) / ".server_ready" self._is_shutting_down = Event() self.openai_process = DrumServerProcess() + self.ai_client = OpenAI( - base_url=f"http://{self.openai_host}:{self.openai_port}/v1", api_key="fake" + base_url=f"http://{self.openai_host}:{self.openai_port}/v1", + api_key="fake", + timeout=self.get_drum_openai_client_timeout(), ) # In multi-container deployments DRUM does not manage OpenAI server processes. diff --git a/custom_model_runner/datarobot_drum/drum/root_predictors/prediction_server.py b/custom_model_runner/datarobot_drum/drum/root_predictors/prediction_server.py index 442070a17..8ca14f063 100644 --- a/custom_model_runner/datarobot_drum/drum/root_predictors/prediction_server.py +++ b/custom_model_runner/datarobot_drum/drum/root_predictors/prediction_server.py @@ -5,14 +5,18 @@ Released under the terms of DataRobot Tool and Utility Agreement. """ import logging +import os import sys from pathlib import Path import requests from flask import Response, jsonify, request from werkzeug.exceptions import HTTPException +from werkzeug.serving import WSGIRequestHandler from opentelemetry import trace + +from datarobot_drum import RuntimeParameters from datarobot_drum.drum.description import version as drum_version from datarobot_drum.drum.enum import ( FLASK_EXT_FILE_NAME, @@ -52,6 +56,12 @@ tracer = trace.get_tracer(__name__) +class TimeoutWSGIRequestHandler(WSGIRequestHandler): + timeout = 3600 + if RuntimeParameters.has("DRUM_CLIENT_REQUEST_TIMEOUT"): + timeout = int(RuntimeParameters.get("DRUM_CLIENT_REQUEST_TIMEOUT")) + + class PredictionServer(PredictMixin): def __init__(self, params: dict, app=None): self._params = params @@ -137,6 +147,17 @@ def _post_predict_and_transform(self): self._stats_collector.disable() self._stdout_flusher.set_last_activity_time() + @staticmethod + def get_nim_direct_access_request_timeout(): + """ + Returns the timeout value for NIM direct access requests. + Checks the 'NIM_DIRECT_ACCESS_REQUEST_TIMEOUT' runtime parameter; if not set, defaults to 3600 seconds. + """ + timeout = 3600 + if RuntimeParameters.has("NIM_DIRECT_ACCESS_REQUEST_TIMEOUT"): + timeout = int(RuntimeParameters.get("NIM_DIRECT_ACCESS_REQUEST_TIMEOUT")) + return timeout + def materialize(self): model_api = base_api_blueprint(self._terminate, self._predictor) @@ -247,12 +268,12 @@ def forward_request(path): openai_host = self._predictor.openai_host openai_port = self._predictor.openai_port - resp = requests.request( method=request.method, url=f"http://{openai_host}:{openai_port}/{path.rstrip('/')}", headers=request.headers, params=request.args, + timeout=self.get_nim_direct_access_request_timeout(), data=request.get_data(), allow_redirects=False, ) @@ -305,7 +326,18 @@ def _run_flask_app(self, app): if self.app: pass else: - app.run(host, port, threaded=False, processes=processes) + # Configure the server with timeout settings + app.run( + host=host, + port=port, + threaded=False, + processes=processes, + **( + {"request_handler": TimeoutWSGIRequestHandler} + if RuntimeParameters.has("DRUM_CLIENT_REQUEST_TIMEOUT") + else {} + ), + ) except OSError as e: raise DrumCommonException("{}: host: {}; port: {}".format(e, host, port)) diff --git a/tests/unit/datarobot_drum/drum/test_prediction_server.py b/tests/unit/datarobot_drum/drum/test_prediction_server.py index eba3ed8c6..7e57687f0 100644 --- a/tests/unit/datarobot_drum/drum/test_prediction_server.py +++ b/tests/unit/datarobot_drum/drum/test_prediction_server.py @@ -15,7 +15,10 @@ from datarobot_drum.drum.enum import RunLanguage, TargetType from datarobot_drum.drum.lazy_loading.lazy_loading_handler import LazyLoadingHandler -from datarobot_drum.drum.root_predictors.prediction_server import PredictionServer +from datarobot_drum.drum.root_predictors.prediction_server import ( + PredictionServer, + TimeoutWSGIRequestHandler, +) from datarobot_drum.drum.server import HEADER_REQUEST_ID from tests.unit.datarobot_drum.drum.chat_utils import create_completion, create_completion_chunks from tests.unit.datarobot_drum.drum.helpers import MODEL_ID_FROM_RUNTIME_PARAMETER @@ -231,8 +234,15 @@ def chat_hook(completion_request, model): assert exc_info.value.response.json()["error"] == "Error" -@pytest.mark.parametrize("processes_param, expected_processes", [(None, 1), (10, 10)]) -def test_run_flask_app(processes_param, expected_processes): +@pytest.mark.parametrize( + "processes_param, expected_processes, request_timeout", [(None, 1, None), (10, 10, 600)] +) +def test_run_flask_app(processes_param, expected_processes, request_timeout): + if request_timeout: + os.environ[ + "MLOPS_RUNTIME_PARAM_DRUM_CLIENT_REQUEST_TIMEOUT" + ] = f'{{"type": "numeric", "payload": {request_timeout}}}' + params = { "host": "localhost", "port": "6789", @@ -248,7 +258,16 @@ def test_run_flask_app(processes_param, expected_processes): app = Mock() server._run_flask_app(app) - app.run.assert_called_with("localhost", "6789", threaded=False, processes=expected_processes) + called_kwargs = { + "host": "localhost", + "port": "6789", + "threaded": False, + "processes": expected_processes, + } + if request_timeout: + called_kwargs["request_handler"] = TimeoutWSGIRequestHandler + + app.run.assert_called_with(**called_kwargs) @pytest.mark.usefixtures("prediction_server") From 6b41abcbba5f67281a3b22118a2016918caf4abd Mon Sep 17 00:00:00 2001 From: Sergey Gavrenkov <86368350+s-gavrenkov@users.noreply.github.com> Date: Fri, 29 Aug 2025 00:20:02 -0400 Subject: [PATCH 22/36] [RAPTOR-14353] Add nim watchdog (#1632) * add watchdog * 5 attempts * watchdog_additional * fixed version * add env USE_NIM_WATCHDOG * add env USE_NIM_WATCHDOG * fixed lint * fixed version * rever docker changes * add changes * removed changelogs * max_attempts = 3 * replaced with os.kill(pid, signal.SIGTERM) * renamed * fixed comments * add NIM_WATCHDOG_REQUEST_TIMEOUT and NIM_WATCHDOG_MAX_ATTEMPTS * add logs * fix lint --- .../drum/root_predictors/prediction_server.py | 110 ++++++++++++++++++ 1 file changed, 110 insertions(+) diff --git a/custom_model_runner/datarobot_drum/drum/root_predictors/prediction_server.py b/custom_model_runner/datarobot_drum/drum/root_predictors/prediction_server.py index 8ca14f063..9e2bbbaf6 100644 --- a/custom_model_runner/datarobot_drum/drum/root_predictors/prediction_server.py +++ b/custom_model_runner/datarobot_drum/drum/root_predictors/prediction_server.py @@ -7,7 +7,11 @@ import logging import os import sys +import time from pathlib import Path +from threading import Thread +import subprocess +import signal import requests from flask import Response, jsonify, request @@ -26,6 +30,7 @@ ModelInfoKeys, RunLanguage, TargetType, + URL_PREFIX_ENV_VAR_NAME, ) from datarobot_drum.drum.exceptions import DrumCommonException from datarobot_drum.drum.model_metadata import read_model_metadata_yaml @@ -82,6 +87,7 @@ def __init__(self, params: dict, app=None): "run_predictor_total", "finish", StatsOperation.SUB, "start" ) self._predictor = self._setup_predictor() + self._server_watchdog = None def _setup_predictor(self): if self._run_language == RunLanguage.PYTHON: @@ -326,6 +332,18 @@ def _run_flask_app(self, app): if self.app: pass else: + if RuntimeParameters.has("USE_NIM_WATCHDOG") and str( + RuntimeParameters.get("USE_NIM_WATCHDOG") + ).lower() in ["true", "1", "yes"]: + # Start the watchdog thread before running the app + self._server_watchdog = Thread( + target=self.watchdog, + args=(port,), + daemon=True, + name="NIM Sidecar Watchdog", + ) + self._server_watchdog.start() + # Configure the server with timeout settings app.run( host=host, @@ -341,6 +359,98 @@ def _run_flask_app(self, app): except OSError as e: raise DrumCommonException("{}: host: {}; port: {}".format(e, host, port)) + def _kill_all_processes(self): + """ + Forcefully terminates all running processes related to the server. + Attempts a clean termination first, then uses system commands to kill remaining processes. + Logs errors encountered during termination. + """ + + logger.error("All health check attempts failed. Forcefully killing all processes.") + + # First try clean termination + try: + self._terminate() + except Exception as e: + logger.error(f"Error during clean termination: {str(e)}") + + # Use more direct system commands to kill processes + try: + # Kill packedge jobs first (more aggressive approach) + logger.info("Killing Python package jobs") + # Run `busybox ps` and capture output + result = subprocess.run(["busybox", "ps"], capture_output=True, text=True) + # Parse lines, skip the header + lines = result.stdout.strip().split("\n")[1:] + # Extract the PID (first column) + pids = [int(line.split()[0]) for line in lines] + for pid in pids: + print("Killing pid:", pid) + os.kill(pid, signal.SIGTERM) + except Exception as kill_error: + logger.error(f"Error during process killing: {str(kill_error)}") + + def watchdog(self, port): + """ + Watchdog thread that periodically checks if the server is alive by making + GET requests to the /info/ endpoint. Makes 3 attempts with quadratic backoff + before terminating the Flask app. + """ + + logger.info("Starting watchdog to monitor server health...") + + import os + + url_host = os.environ.get("TEST_URL_HOST", "localhost") + url_prefix = os.environ.get(URL_PREFIX_ENV_VAR_NAME, "") + health_url = f"http://{url_host}:{port}{url_prefix}/info/" + + request_timeout = 120 + if RuntimeParameters.has("NIM_WATCHDOG_REQUEST_TIMEOUT"): + try: + request_timeout = int(RuntimeParameters.get("NIM_WATCHDOG_REQUEST_TIMEOUT")) + except ValueError: + logger.warning( + "Invalid value for NIM_WATCHDOG_REQUEST_TIMEOUT, using default of 120 seconds" + ) + logger.info("Nim watchdog health check request timeout is %s", request_timeout) + check_interval = 10 # seconds + max_attempts = 3 + if RuntimeParameters.has("NIM_WATCHDOG_MAX_ATTEMPTS"): + try: + max_attempts = int(RuntimeParameters.get("NIM_WATCHDOG_MAX_ATTEMPTS")) + except ValueError: + logger.warning("Invalid value for NIM_WATCHDOG_MAX_ATTEMPTS, using default of 3") + logger.info("Nim watchdog max attempts: %s", max_attempts) + attempt = 0 + base_sleep_time = 4 + + while True: + try: + # Check if server is responding to health checks + logger.debug(f"Server health check") + response = requests.get(health_url, timeout=request_timeout) + logger.debug(f"Server health check status: {response.status_code}") + # Connection succeeded, reset attempts and wait for next check + attempt = 0 + time.sleep(check_interval) # Regular check interval + continue + + except Exception as e: + attempt += 1 + logger.warning(f"health_url {health_url}") + logger.warning( + f"Server health check failed (attempt {attempt}/{max_attempts}): {str(e)}" + ) + + if attempt >= max_attempts: + self._kill_all_processes() + + # Quadratic backoff + sleep_time = base_sleep_time * (attempt**2) + logger.info(f"Retrying in {sleep_time} seconds...") + time.sleep(sleep_time) + def terminate(self): terminate_op = getattr(self._predictor, "terminate", None) if callable(terminate_op): From eef913ed97f94302f1a3d422c202986a72ac1a41 Mon Sep 17 00:00:00 2001 From: Sergey Gavrenkov <86368350+s-gavrenkov@users.noreply.github.com> Date: Fri, 29 Aug 2025 14:48:01 -0400 Subject: [PATCH 23/36] updated version (#1643) --- custom_model_runner/CHANGELOG.md | 4 ++++ custom_model_runner/datarobot_drum/drum/description.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/custom_model_runner/CHANGELOG.md b/custom_model_runner/CHANGELOG.md index 7676659c7..cb8993d0a 100644 --- a/custom_model_runner/CHANGELOG.md +++ b/custom_model_runner/CHANGELOG.md @@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +#### [1.16.25] - 2025-08-29 +##### Changed +- Added request timeouts and created a NIM watchdog. + #### [1.16.24] - 2025-08-25 ##### Changed - Fixed issue with OTEL logs infinite recursion. diff --git a/custom_model_runner/datarobot_drum/drum/description.py b/custom_model_runner/datarobot_drum/drum/description.py index 83a754e6c..8a88317f6 100644 --- a/custom_model_runner/datarobot_drum/drum/description.py +++ b/custom_model_runner/datarobot_drum/drum/description.py @@ -4,6 +4,6 @@ This is proprietary source code of DataRobot, Inc. and its affiliates. Released under the terms of DataRobot Tool and Utility Agreement. """ -version = "1.16.24" +version = "1.16.25" __version__ = version project_name = "datarobot-drum" From b0bd728d9d795469476c5750606ec909e6cfbfd0 Mon Sep 17 00:00:00 2001 From: svc-harness-git2 Date: Sat, 30 Aug 2025 02:49:20 +0000 Subject: [PATCH 24/36] Reconcile dependencies, updated IDs, tags --- .../python311_genai_agents/env_info.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/public_dropin_environments/python311_genai_agents/env_info.json b/public_dropin_environments/python311_genai_agents/env_info.json index 7acc25582..ed06db2fd 100644 --- a/public_dropin_environments/python311_genai_agents/env_info.json +++ b/public_dropin_environments/python311_genai_agents/env_info.json @@ -4,7 +4,7 @@ "description": "This template environment can be used to create GenAI-powered agents using CrewAI, LangGraph, or Llama-Index. Similar to other drop-in environments, you can either include a .pth artifact or any other code needed to deserialize your model, and optionally a custom.py file. You can also use this environment in codespaces.", "programmingLanguage": "python", "label": "", - "environmentVersionId": "689e166376dbcf1206bb5ce4", + "environmentVersionId": "68b2666df0c65f1204faa9a4", "environmentVersionDescription": "", "isPublic": true, "isDownloadable": true, @@ -15,8 +15,8 @@ "contextUrl": "https://github.com/datarobot/datarobot-user-models/tree/master/public_dropin_environments/python311_genai_agents", "imageRepository": "env-python-genai-agents", "tags": [ - "v11.2.0-689e166376dbcf1206bb5ce4", - "689e166376dbcf1206bb5ce4", + "v11.2.0-68b2666df0c65f1204faa9a4", + "68b2666df0c65f1204faa9a4", "v11.2.0-latest" ] } From 55c8763b777921cbf8efa939e07969f530f82a71 Mon Sep 17 00:00:00 2001 From: Sergey Gavrenkov Date: Mon, 1 Sep 2025 00:06:18 -0400 Subject: [PATCH 25/36] working --- .../datarobot_drum/drum/drum.py | 39 +++++++++++++------ .../datarobot_drum/drum/main.py | 21 ++++++++-- .../datarobot_drum/drum/runtime.py | 1 - .../datarobot_drum/drum/server.py | 4 +- 4 files changed, 48 insertions(+), 17 deletions(-) diff --git a/custom_model_runner/datarobot_drum/drum/drum.py b/custom_model_runner/datarobot_drum/drum/drum.py index 5bb5d0960..eec731a50 100644 --- a/custom_model_runner/datarobot_drum/drum/drum.py +++ b/custom_model_runner/datarobot_drum/drum/drum.py @@ -77,9 +77,10 @@ class CMRunner: - def __init__(self, runtime, app=None): + def __init__(self, runtime, app=None, worker_ctx=None): self.runtime = runtime self.app = app + self.worker_ctx = worker_ctx self.options = runtime.options self.options.model_config = read_model_metadata_yaml(self.options.code_dir) self.options.default_parameter_values = ( @@ -498,8 +499,11 @@ def run(self): with self._setup_output_if_not_exists(): self._run_predictions(stats_collector) finally: - if stats_collector: - stats_collector.disable() + if self.worker_ctx: + self.worker_ctx.defer_cleanup(lambda: stats_collector.disable(), desc="stats_collector.disable()") + else: + if stats_collector: + stats_collector.disable() if stats_collector: stats_collector.print_reports() elif self.run_mode == RunMode.SERVER: @@ -837,16 +841,27 @@ def _run_predictions(self, stats_collector: Optional[StatsCollector] = None): if stats_collector: stats_collector.mark("run") finally: - if predictor is not None: - predictor.terminate() - if stats_collector: - stats_collector.mark("end") + if self.worker_ctx: + if predictor is not None: + self.worker_ctx.defer_cleanup(lambda: predictor.terminate(), desc="predictor.terminate()") + if stats_collector: + self.worker_ctx.defer_cleanup(lambda: stats_collector.mark("end"), desc="stats_collector.mark('end')") + self.worker_ctx.defer_cleanup(lambda: self.logger.info( + "<<< Finish {} in the {} mode".format( + ArgumentsOptions.MAIN_COMMAND, self.run_mode.value + ) + ), desc="logger.info(...)") - self.logger.info( - "<<< Finish {} in the {} mode".format( - ArgumentsOptions.MAIN_COMMAND, self.run_mode.value - ) - ) + else: + if predictor is not None: + predictor.terminate() + if stats_collector: + stats_collector.mark("end") + self.logger.info( + "<<< Finish {} in the {} mode".format( + ArgumentsOptions.MAIN_COMMAND, self.run_mode.value + ) + ) @contextlib.contextmanager def _setup_output_if_not_exists(self): diff --git a/custom_model_runner/datarobot_drum/drum/main.py b/custom_model_runner/datarobot_drum/drum/main.py index 379e6914e..cd444203e 100644 --- a/custom_model_runner/datarobot_drum/drum/main.py +++ b/custom_model_runner/datarobot_drum/drum/main.py @@ -54,10 +54,25 @@ ) -def main(app): +def main(app=None, worker_ctx=None): with DrumRuntime(app) as runtime: config_logging() + if worker_ctx: + if runtime.options and RunMode(runtime.options.subparser_name) == RunMode.SERVER: + if runtime.cm_runner: + worker_ctx.defer_cleanup(lambda: runtime.cm_runner.terminate(), desc="runtime.cm_runner.terminate()") + # Let traceer offload accumulated spans before shutdown. + if runtime.trace_provider is not None: + worker_ctx.defer_cleanup(lambda: runtime.trace_provider.shutdown(), desc="runtime.trace_provider.shutdown()") + if runtime.metric_provider is not None: + worker_ctx.defer_cleanup(lambda: runtime.metric_provider.shutdown(), + desc="runtime.metric_provider.shutdown()") + if runtime.log_provider is not None: + worker_ctx.defer_cleanup(lambda: runtime.log_provider.shutdown(), + desc="runtime.log_provider.shutdown()") + #os._exit(130) + def signal_handler(sig, frame): # The signal is assigned so the stacktrace is not presented when Ctrl-C is pressed. # The cleanup itself is done only if we are NOT running in performance test mode which @@ -95,11 +110,11 @@ def signal_handler(sig, frame): from datarobot_drum.drum.drum import CMRunner try: - runtime.cm_runner = CMRunner(runtime, app) + runtime.cm_runner = CMRunner(runtime, app, worker_ctx) runtime.cm_runner.run() except DrumSchemaValidationException: sys.exit(ExitCodes.SCHEMA_VALIDATION_ERROR.value) if __name__ == "__main__": - pass + main() diff --git a/custom_model_runner/datarobot_drum/drum/runtime.py b/custom_model_runner/datarobot_drum/drum/runtime.py index 0ecd72fb8..46b3f020d 100644 --- a/custom_model_runner/datarobot_drum/drum/runtime.py +++ b/custom_model_runner/datarobot_drum/drum/runtime.py @@ -109,7 +109,6 @@ def predict(): @model_api.route("/transform/", methods=["POST"]) def transform(): return {"message": "ERROR: {}".format(exc_value)}, HTTP_513_DRUM_PIPELINE_ERROR - print(f"rrrrrrrr{host}, {port}") if app: pass else: diff --git a/custom_model_runner/datarobot_drum/drum/server.py b/custom_model_runner/datarobot_drum/drum/server.py index 029271aa3..744702dee 100644 --- a/custom_model_runner/datarobot_drum/drum/server.py +++ b/custom_model_runner/datarobot_drum/drum/server.py @@ -29,7 +29,9 @@ logger = get_drum_logger(LOGGER_NAME_PREFIX) -def get_flask_app(api_blueprint, app): +def get_flask_app(api_blueprint, app=None): + if app is None: + app = create_flask_app() url_prefix = os.environ.get(URL_PREFIX_ENV_VAR_NAME, "") app.register_blueprint(api_blueprint, url_prefix=url_prefix) return app From a690532918d05b7a086b16d184f8fba7e60da6c6 Mon Sep 17 00:00:00 2001 From: svc-harness-git2 Date: Mon, 1 Sep 2025 04:13:18 +0000 Subject: [PATCH 26/36] Reconcile dependencies, updated IDs, tags --- .../python311_genai_agents/env_info.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/public_dropin_environments/python311_genai_agents/env_info.json b/public_dropin_environments/python311_genai_agents/env_info.json index 7acc25582..43c46a6b6 100644 --- a/public_dropin_environments/python311_genai_agents/env_info.json +++ b/public_dropin_environments/python311_genai_agents/env_info.json @@ -4,7 +4,7 @@ "description": "This template environment can be used to create GenAI-powered agents using CrewAI, LangGraph, or Llama-Index. Similar to other drop-in environments, you can either include a .pth artifact or any other code needed to deserialize your model, and optionally a custom.py file. You can also use this environment in codespaces.", "programmingLanguage": "python", "label": "", - "environmentVersionId": "689e166376dbcf1206bb5ce4", + "environmentVersionId": "68b51d07d61ffa11f442b02e", "environmentVersionDescription": "", "isPublic": true, "isDownloadable": true, @@ -15,8 +15,8 @@ "contextUrl": "https://github.com/datarobot/datarobot-user-models/tree/master/public_dropin_environments/python311_genai_agents", "imageRepository": "env-python-genai-agents", "tags": [ - "v11.2.0-689e166376dbcf1206bb5ce4", - "689e166376dbcf1206bb5ce4", + "v11.2.0-68b51d07d61ffa11f442b02e", + "68b51d07d61ffa11f442b02e", "v11.2.0-latest" ] } From 170a04ceaa616dc056ba9749539ad1b2d9a38f27 Mon Sep 17 00:00:00 2001 From: Sergey Gavrenkov Date: Mon, 1 Sep 2025 00:44:44 -0400 Subject: [PATCH 27/36] refactoring --- .../datarobot_drum/drum/drum.py | 10 ++++---- .../datarobot_drum/drum/main.py | 23 +++++++++++++++---- .../drum/root_predictors/drum_inline_utils.py | 4 ++-- .../drum/root_predictors/prediction_server.py | 8 +++---- .../datarobot_drum/drum/runtime.py | 15 +++++++----- .../datarobot_drum/drum/server.py | 4 +++- .../python311_genai_agents/run_agent.py | 3 --- 7 files changed, 42 insertions(+), 25 deletions(-) diff --git a/custom_model_runner/datarobot_drum/drum/drum.py b/custom_model_runner/datarobot_drum/drum/drum.py index eec731a50..96a7ea7bb 100644 --- a/custom_model_runner/datarobot_drum/drum/drum.py +++ b/custom_model_runner/datarobot_drum/drum/drum.py @@ -77,10 +77,10 @@ class CMRunner: - def __init__(self, runtime, app=None, worker_ctx=None): + def __init__(self, runtime, flask_app=None, worker_ctx=None): self.runtime = runtime - self.app = app - self.worker_ctx = worker_ctx + self.flask_app = flask_app # This is the Flask app object, used when running the application via CLI + self.worker_ctx = worker_ctx # This is the Gunicorn worker context object (WorkerCtx) self.options = runtime.options self.options.model_config = read_model_metadata_yaml(self.options.code_dir) self.options.default_parameter_values = ( @@ -500,6 +500,7 @@ def run(self): self._run_predictions(stats_collector) finally: if self.worker_ctx: + # Add cleanup when running via the command line (gunicorn worker) self.worker_ctx.defer_cleanup(lambda: stats_collector.disable(), desc="stats_collector.disable()") else: if stats_collector: @@ -831,7 +832,7 @@ def _run_predictions(self, stats_collector: Optional[StatsCollector] = None): if stats_collector: stats_collector.mark("start") predictor = ( - PredictionServer(params, self.app) + PredictionServer(params, self.flask_app) if self.run_mode == RunMode.SERVER else GenericPredictorComponent(params) ) @@ -842,6 +843,7 @@ def _run_predictions(self, stats_collector: Optional[StatsCollector] = None): stats_collector.mark("run") finally: if self.worker_ctx: + # Add cleanup when running via the command line (gunicorn worker) if predictor is not None: self.worker_ctx.defer_cleanup(lambda: predictor.terminate(), desc="predictor.terminate()") if stats_collector: diff --git a/custom_model_runner/datarobot_drum/drum/main.py b/custom_model_runner/datarobot_drum/drum/main.py index cd444203e..27bd4bc0f 100644 --- a/custom_model_runner/datarobot_drum/drum/main.py +++ b/custom_model_runner/datarobot_drum/drum/main.py @@ -54,15 +54,29 @@ ) -def main(app=None, worker_ctx=None): - with DrumRuntime(app) as runtime: +def main(flask_app=None, worker_ctx=None): + """ + The main entry point for the custom model runner. + + This function initializes the runtime environment, sets up logging, handles + signal interruptions, and starts the CMRunner for executing user-defined models. + + Args: + flask_app: Optional[Flask] Flask application instance, used when running using command line. + worker_ctx: Optional gunicorn worker context (WorkerCtx), used for managing cleanup tasks in a + multi-worker setup (e.g., Gunicorn). + + Returns: + None + """ + with DrumRuntime(flask_app) as runtime: config_logging() if worker_ctx: + # Add cleanup when running via the command line (gunicorn worker) if runtime.options and RunMode(runtime.options.subparser_name) == RunMode.SERVER: if runtime.cm_runner: worker_ctx.defer_cleanup(lambda: runtime.cm_runner.terminate(), desc="runtime.cm_runner.terminate()") - # Let traceer offload accumulated spans before shutdown. if runtime.trace_provider is not None: worker_ctx.defer_cleanup(lambda: runtime.trace_provider.shutdown(), desc="runtime.trace_provider.shutdown()") if runtime.metric_provider is not None: @@ -71,7 +85,6 @@ def main(app=None, worker_ctx=None): if runtime.log_provider is not None: worker_ctx.defer_cleanup(lambda: runtime.log_provider.shutdown(), desc="runtime.log_provider.shutdown()") - #os._exit(130) def signal_handler(sig, frame): # The signal is assigned so the stacktrace is not presented when Ctrl-C is pressed. @@ -110,7 +123,7 @@ def signal_handler(sig, frame): from datarobot_drum.drum.drum import CMRunner try: - runtime.cm_runner = CMRunner(runtime, app, worker_ctx) + runtime.cm_runner = CMRunner(runtime, flask_app, worker_ctx) runtime.cm_runner.run() except DrumSchemaValidationException: sys.exit(ExitCodes.SCHEMA_VALIDATION_ERROR.value) diff --git a/custom_model_runner/datarobot_drum/drum/root_predictors/drum_inline_utils.py b/custom_model_runner/datarobot_drum/drum/root_predictors/drum_inline_utils.py index 1837e72af..19e9ed005 100644 --- a/custom_model_runner/datarobot_drum/drum/root_predictors/drum_inline_utils.py +++ b/custom_model_runner/datarobot_drum/drum/root_predictors/drum_inline_utils.py @@ -37,7 +37,7 @@ @contextlib.contextmanager def drum_inline_predictor( - target_type: str, custom_model_dir: str, target_name: str, app, *cmd_args: List[str] + target_type: str, custom_model_dir: str, target_name: str, *cmd_args: List[str] ) -> Generator[BaseLanguagePredictor, None, None]: """ Drum run for a custom model code definition. Yields a predictor, ready to work with. @@ -49,7 +49,7 @@ def drum_inline_predictor( :param cmd_args: Extra command line arguments :return: """ - with DrumRuntime(app) as runtime, tempfile.NamedTemporaryFile(mode="wb") as tf: + with DrumRuntime() as runtime, tempfile.NamedTemporaryFile(mode="wb") as tf: # setup os.environ["TARGET_NAME"] = target_name diff --git a/custom_model_runner/datarobot_drum/drum/root_predictors/prediction_server.py b/custom_model_runner/datarobot_drum/drum/root_predictors/prediction_server.py index 3812fdb67..559ca1187 100644 --- a/custom_model_runner/datarobot_drum/drum/root_predictors/prediction_server.py +++ b/custom_model_runner/datarobot_drum/drum/root_predictors/prediction_server.py @@ -68,9 +68,9 @@ class TimeoutWSGIRequestHandler(WSGIRequestHandler): class PredictionServer(PredictMixin): - def __init__(self, params: dict, app=None): + def __init__(self, params: dict, flask_app=None): self._params = params - self.app = app + self.flask_app = flask_app self._show_perf = self._params.get("show_perf") self._resource_monitor = ResourceMonitor(monitor_current_process=True) self._run_language = RunLanguage(params.get("run_language")) @@ -311,7 +311,7 @@ def handle_exception(e): cli = sys.modules["flask.cli"] cli.show_server_banner = lambda *x: None - app = get_flask_app(model_api, self.app) + app = get_flask_app(model_api, self.flask_app) self.load_flask_extensions(app) self._run_flask_app(app) @@ -329,7 +329,7 @@ def _run_flask_app(self, app): processes = self._params.get("processes") logger.info("Number of webserver processes: %s", processes) try: - if self.app: + if self.flask_app: pass else: if RuntimeParameters.has("USE_NIM_WATCHDOG") and str( diff --git a/custom_model_runner/datarobot_drum/drum/runtime.py b/custom_model_runner/datarobot_drum/drum/runtime.py index 46b3f020d..38137c3ed 100644 --- a/custom_model_runner/datarobot_drum/drum/runtime.py +++ b/custom_model_runner/datarobot_drum/drum/runtime.py @@ -5,6 +5,7 @@ Released under the terms of DataRobot Tool and Utility Agreement. """ import logging +from typing import Optional from datarobot_drum.drum.server import ( empty_api_blueprint, @@ -13,6 +14,7 @@ ) from datarobot_drum.drum.common import verbose_stdout, get_drum_logger from datarobot_drum.drum.enum import LOGGER_NAME_PREFIX, RunMode +from flask import Flask from datarobot_drum.drum.exceptions import DrumCommonException @@ -23,7 +25,7 @@ class DrumRuntime: - def __init__(self, app): + def __init__(self, flask_app: Optional[Flask] = None): self.initialization_succeeded = False self.options = None self.cm_runner = None @@ -31,7 +33,7 @@ def __init__(self, app): self.trace_provider = None self.metric_provider = None self.log_provider = None - self.app = app + self.flask_app = flask_app def __enter__(self): return self @@ -84,12 +86,12 @@ def __exit__(self, exc_type, exc_value, exc_traceback): port = int(host_port_list[1]) if len(host_port_list) == 2 else None with verbose_stdout(self.options.verbose): - run_error_server(host, port, exc_value, self.app) + run_error_server(host, port, exc_value, self.flask_app) return False # propagate exception further -def run_error_server(host, port, exc_value, app): +def run_error_server(host, port, exc_value, flask_app:Optional[Flask]=None): model_api = empty_api_blueprint() @model_api.route("/", methods=["GET"]) @@ -109,8 +111,9 @@ def predict(): @model_api.route("/transform/", methods=["POST"]) def transform(): return {"message": "ERROR: {}".format(exc_value)}, HTTP_513_DRUM_PIPELINE_ERROR - if app: + + app = get_flask_app(model_api, flask_app) + if flask_app: pass else: - app = get_flask_app(model_api, app) app.run(host, port) diff --git a/custom_model_runner/datarobot_drum/drum/server.py b/custom_model_runner/datarobot_drum/drum/server.py index 744702dee..2978d45c6 100644 --- a/custom_model_runner/datarobot_drum/drum/server.py +++ b/custom_model_runner/datarobot_drum/drum/server.py @@ -5,6 +5,8 @@ Released under the terms of DataRobot Tool and Utility Agreement. """ import datetime +from typing import Optional + import flask import os import uuid @@ -29,7 +31,7 @@ logger = get_drum_logger(LOGGER_NAME_PREFIX) -def get_flask_app(api_blueprint, app=None): +def get_flask_app(api_blueprint, app:Optional[Flask]=None): if app is None: app = create_flask_app() url_prefix = os.environ.get(URL_PREFIX_ENV_VAR_NAME, "") diff --git a/public_dropin_environments/python311_genai_agents/run_agent.py b/public_dropin_environments/python311_genai_agents/run_agent.py index cf99d2828..a645e22b6 100644 --- a/public_dropin_environments/python311_genai_agents/run_agent.py +++ b/public_dropin_environments/python311_genai_agents/run_agent.py @@ -255,7 +255,6 @@ def execute_drum( def execute_drum_inline( chat_completion: CompletionCreateParamsBase, custom_model_dir: Path, - app ) -> ChatCompletion: root.info("Executing agent as [chat] endpoint. DRUM Inline Executor.") @@ -264,7 +263,6 @@ def execute_drum_inline( target_type=TargetType.AGENTIC_WORKFLOW.value, custom_model_dir=custom_model_dir, target_name="response", - app ) as predictor: root.info("Executing Agent") completion = predictor.chat(chat_completion) @@ -310,7 +308,6 @@ def run_agent_procedure(args: Any) -> None: result = execute_drum_inline( chat_completion=chat_completion, custom_model_dir=args.custom_model_dir, - app, ) else: result = execute_drum( From 3b1967c265478bbca9685692ef3ed4ef751e97a3 Mon Sep 17 00:00:00 2001 From: Sergey Gavrenkov Date: Mon, 1 Sep 2025 00:46:32 -0400 Subject: [PATCH 28/36] revert nim sidecar --- .../nim_sidecar/Dockerfile | 8 +- .../nim_sidecar/app.py | 14 -- .../nim_sidecar/env_info.json | 9 +- .../nim_sidecar/requirements.in | 2 - .../nim_sidecar/requirements.txt | 154 +++++++++--------- .../nim_sidecar/start_server.sh | 5 +- 6 files changed, 81 insertions(+), 111 deletions(-) delete mode 100644 public_dropin_nim_environments/nim_sidecar/app.py diff --git a/public_dropin_nim_environments/nim_sidecar/Dockerfile b/public_dropin_nim_environments/nim_sidecar/Dockerfile index 16774a254..75a314df3 100644 --- a/public_dropin_nim_environments/nim_sidecar/Dockerfile +++ b/public_dropin_nim_environments/nim_sidecar/Dockerfile @@ -1,6 +1,6 @@ # This is a private chain-guard development image that is stored in DataRobot's private registry. # Replace it with your own development chain-gaurd image if you build your own. -FROM datarobotdev/mirror_chainguard_datarobot.com_python-fips:3.11-dev AS build +FROM datarobotdev/mirror_chainguard_datarobot.com_python-fips:3.11-dev as build ENV VIRTUAL_ENV=/opt/venv USER root @@ -9,8 +9,7 @@ RUN python -m venv ${VIRTUAL_ENV} && \ COPY requirements.txt requirements.txt RUN ${VIRTUAL_ENV}/bin/python -m pip install -r requirements.txt -COPY datarobot_drum-1.16.21-py3-none-any.whl /tmp/ -RUN ${VIRTUAL_ENV}/bin/python -m pip install /tmp/datarobot_drum-1.16.21-py3-none-any.whl + # This is a private production chain-guard image that is stored in DataRobot's private registry. # Replace it with your own production chain-gaurd image if you build your own. @@ -46,9 +45,6 @@ ENV CODE_DIR=/opt/code ENV ADDRESS=0.0.0.0:8080 ENV WITH_ERROR_SERVER=1 -RUN mkdir -p ${CODE_DIR} \ - && chmod a+rwX ${CODE_DIR} - # This makes print statements show up in the logs API ENV PYTHONUNBUFFERED=1 diff --git a/public_dropin_nim_environments/nim_sidecar/app.py b/public_dropin_nim_environments/nim_sidecar/app.py deleted file mode 100644 index 3de0b2ce2..000000000 --- a/public_dropin_nim_environments/nim_sidecar/app.py +++ /dev/null @@ -1,14 +0,0 @@ -# Import DRUM's WSGI application - -import sys -from datarobot_drum.drum.main import main -from datarobot_drum.drum.server import create_flask_app - -sys.argv = [ - "drum","server", # Program name - "--sidecar","--gpu-predictor=nim", "--logging-level=info" -] - -app = create_flask_app() -main(app) - diff --git a/public_dropin_nim_environments/nim_sidecar/env_info.json b/public_dropin_nim_environments/nim_sidecar/env_info.json index a466cea9c..5b7319fd1 100644 --- a/public_dropin_nim_environments/nim_sidecar/env_info.json +++ b/public_dropin_nim_environments/nim_sidecar/env_info.json @@ -4,18 +4,17 @@ "description": "", "programmingLanguage": "python", "label": "", - "environmentVersionId": "6848a84e0c7c49131250ffd4", + "environmentVersionId": "6848a84e0c7c49131250fdd4", "environmentVersionDescription": "Run with 10 HTTP workers by default", "isPublic": true, - "isDownloadable": true, "useCases": [ "customModel" ], "contextUrl": "https://github.com/datarobot/datarobot-user-models/tree/master/public_dropin_nim_environments/nim_sidecar", "imageRepository": "env-nim-sidecar", "tags": [ - "v11.2.0-6848a84e0c7c49131250ffd4", - "6848a84e0c7c49131250ffd4", - "v11.2.0-latest" + "v11.1.0-6848b6572081a81ac56c7a0b", + "6848b6572081a81ac56c7a0b", + "v11.1.0-latest" ] } diff --git a/public_dropin_nim_environments/nim_sidecar/requirements.in b/public_dropin_nim_environments/nim_sidecar/requirements.in index 309a23033..598d987b5 100644 --- a/public_dropin_nim_environments/nim_sidecar/requirements.in +++ b/public_dropin_nim_environments/nim_sidecar/requirements.in @@ -3,6 +3,4 @@ datarobot-mlops datarobot-mlops-connected-client datarobot-drum openai>=1.17.0 -gunicorn>=23.0.0 -gevent>=25.5.1 opentelemetry-instrumentation-openai diff --git a/public_dropin_nim_environments/nim_sidecar/requirements.txt b/public_dropin_nim_environments/nim_sidecar/requirements.txt index bfdbd4786..71133d457 100644 --- a/public_dropin_nim_environments/nim_sidecar/requirements.txt +++ b/public_dropin_nim_environments/nim_sidecar/requirements.txt @@ -4,121 +4,113 @@ # # pip-compile --index-url=https://pypi.org/simple --no-annotate --no-emit-index-url --no-emit-trusted-host --output-file=requirements.txt requirements.in # -aiohappyeyeballs==2.6.1 -aiohttp==3.12.15 -aiosignal==1.4.0 +aiohappyeyeballs==2.4.6 +aiohttp==3.11.13 +aiosignal==1.3.2 annotated-types==0.7.0 -anyio==4.10.0 -argcomplete==3.6.2 +anyio==4.8.0 +argcomplete==3.5.3 async-timeout==5.0.1 -attrs==25.3.0 -azure-core==1.35.0 -azure-identity==1.24.0 +attrs==25.1.0 +azure-core==1.32.0 +azure-identity==1.20.0 azure-storage-blob==12.19.0 +backoff==2.2.1 blinker==1.9.0 -boto3==1.40.13 -botocore==1.40.13 -cachetools==5.5.2 -certifi==2025.8.3 +boto3==1.37.1 +botocore==1.37.1 +cachetools==4.2.4 +certifi==2025.1.31 cffi==1.17.1 -charset-normalizer==3.4.3 -click==8.2.1 -cryptography==45.0.6 -datarobot==3.8.2 -datarobot-drum==1.16.23 -datarobot-mlops==11.1.0 -datarobot-mlops-connected-client==11.1.0 -datarobot-storage==2.2.0 +charset-normalizer==3.4.1 +click==8.1.8 +cryptography==44.0.1 +datarobot==3.6.3 +datarobot-drum==1.16.17 +datarobot-mlops==11.1.0a3 +datarobot-mlops-connected-client==11.1.0a3 +datarobot-storage==0.0.0 +deprecated==1.2.18 distro==1.9.0 docker==7.1.0 -exceptiongroup==1.3.0 +exceptiongroup==1.2.2 filechunkio==1.8 -flask==3.1.2 -frozenlist==1.7.0 -gevent==25.5.1 -google-api-core==2.25.1 -google-auth==2.40.3 -google-cloud-core==2.4.3 -google-cloud-storage==2.19.0 -google-crc32c==1.7.1 +flask==3.1.0 +frozenlist==1.5.0 +google-api-core==1.34.0 +google-auth==1.28.1 +google-cloud-core==2.4.2 +google-cloud-storage==1.43.0 +google-crc32c==1.6.0 google-resumable-media==2.7.2 -googleapis-common-protos==1.70.0 -greenlet==3.2.4 -gunicorn==23.0.0 -h11==0.16.0 -httpcore==1.0.9 +googleapis-common-protos==1.68.0 +h11==0.14.0 +httpcore==1.0.7 httpx==0.28.1 idna==3.10 -importlib-metadata==8.7.0 isodate==0.7.2 itsdangerous==2.2.0 -jinja2==3.1.6 -jiter==0.10.0 +jinja2==3.1.5 +jiter==0.8.2 jmespath==1.0.1 julia==0.5.7 markupsafe==3.0.2 memory-profiler==0.61.0 -msal==1.33.0 -msal-extensions==1.3.1 -multidict==6.6.4 -mypy-extensions==1.1.0 -numpy==2.2.6 -openai==1.100.2 -opentelemetry-api==1.36.0 -opentelemetry-exporter-otlp-proto-common==1.36.0 -opentelemetry-exporter-otlp-proto-http==1.36.0 -opentelemetry-instrumentation==0.57b0 -opentelemetry-instrumentation-aiohttp-client==0.57b0 -opentelemetry-instrumentation-openai==0.45.6 -opentelemetry-instrumentation-requests==0.57b0 -opentelemetry-proto==1.36.0 -opentelemetry-sdk==1.36.0 -opentelemetry-semantic-conventions==0.57b0 -opentelemetry-semantic-conventions-ai==0.4.12 -opentelemetry-util-http==0.57b0 -orjson==3.11.2 -packaging==25.0 -pandas==2.3.1 -pillow==11.3.0 -progress==1.6.1 -propcache==0.3.2 -proto-plus==1.26.1 -protobuf==6.32.0 +msal==1.31.1 +msal-extensions==1.2.0 +multidict==6.1.0 +mypy-extensions==1.0.0 +numpy==2.0.2 +openai==1.64.0 +opentelemetry-api==1.16.0 +opentelemetry-exporter-otlp-proto-http==1.16.0 +opentelemetry-instrumentation-aiohttp-client==0.37b0 +opentelemetry-instrumentation-openai==0.37b0 +opentelemetry-instrumentation-requests==0.37b0 +opentelemetry-instrumentation==0.37b0 +opentelemetry-proto==1.16.0 +opentelemetry-sdk==1.16.0 +opentelemetry-semantic-conventions==0.37b0 +opentelemetry-util-http==0.37b0 +orjson==3.10.15 +packaging==24.2 +pandas==2.2.3 +pillow==11.1.0 +portalocker==2.10.1 +progress==1.6 +propcache==0.3.0 +protobuf==3.20.3 psutil==7.0.0 py4j==0.10.9.9 pyasn1==0.6.1 -pyasn1-modules==0.4.2 +pyasn1-modules==0.4.1 pycparser==2.22 -pydantic==2.11.7 -pydantic-core==2.33.2 +pydantic==2.10.6 +pydantic-core==2.27.2 pyjwt[crypto]==2.10.1 python-dateutil==2.9.0.post0 -pytz==2025.2 +pytz==2025.1 pyyaml==6.0.2 -requests==2.32.5 +requests==2.32.3 requests-toolbelt==1.0.0 -rsa==4.9.1 +rsa==4.9 ruamel-yaml==0.17.4 -s3transfer==0.13.1 -scipy==1.15.3 +s3transfer==0.11.2 +scipy==1.13.1 six==1.17.0 sniffio==1.3.1 strenum==0.4.15 strictyaml==1.4.2 -termcolor==3.1.0 +termcolor==2.5.0 texttable==1.7.0 tqdm==4.67.1 trafaret==2.1.1 -typing-extensions==4.14.1 -typing-inspection==0.4.1 -tzdata==2025.2 -urllib3==2.5.0 +typing-extensions==4.12.2 +tzdata==2025.1 +urllib3==1.26.20 werkzeug==3.1.3 -wrapt==1.17.3 -yarl==1.20.1 -zipp==3.23.0 -zope-event==5.1.1 -zope-interface==7.2 +wrapt==1.17.2 +yarl==1.18.3 # The following packages are considered to be unsafe in a requirements file: # setuptools diff --git a/public_dropin_nim_environments/nim_sidecar/start_server.sh b/public_dropin_nim_environments/nim_sidecar/start_server.sh index 8f92362db..82a2ffa69 100755 --- a/public_dropin_nim_environments/nim_sidecar/start_server.sh +++ b/public_dropin_nim_environments/nim_sidecar/start_server.sh @@ -8,6 +8,7 @@ echo "Starting Custom Model environment with NIM" set -e + if [ "${ENABLE_CUSTOM_MODEL_RUNTIME_ENV_DUMP}" = 1 ]; then echo "Environment variables:" env @@ -16,6 +17,4 @@ fi echo echo "Starting DRUM server..." echo - -exec gunicorn app:app --worker-class gevent --workers=8 --bind=0.0.0.0:8080 --backlog=512 --timeout 120 --max-requests 700 --max-requests-jitter 400 --log-level=info --access-logfile - --access-logformat '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s"'& #works -wait \ No newline at end of file +exec drum server --sidecar --gpu-predictor=nim --logging-level=info "$@" From c625d6dde5c15b62410ec07d7d065b58755073a7 Mon Sep 17 00:00:00 2001 From: Sergey Gavrenkov Date: Mon, 1 Sep 2025 00:51:58 -0400 Subject: [PATCH 29/36] revrt env_info.json --- .../python311_genai_agents/env_info.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/public_dropin_environments/python311_genai_agents/env_info.json b/public_dropin_environments/python311_genai_agents/env_info.json index 43c46a6b6..838c31b3e 100644 --- a/public_dropin_environments/python311_genai_agents/env_info.json +++ b/public_dropin_environments/python311_genai_agents/env_info.json @@ -4,7 +4,7 @@ "description": "This template environment can be used to create GenAI-powered agents using CrewAI, LangGraph, or Llama-Index. Similar to other drop-in environments, you can either include a .pth artifact or any other code needed to deserialize your model, and optionally a custom.py file. You can also use this environment in codespaces.", "programmingLanguage": "python", "label": "", - "environmentVersionId": "68b51d07d61ffa11f442b02e", + "environmentVersionId": "689e166376dbcf1206bb5ce4", "environmentVersionDescription": "", "isPublic": true, "isDownloadable": true, @@ -15,8 +15,8 @@ "contextUrl": "https://github.com/datarobot/datarobot-user-models/tree/master/public_dropin_environments/python311_genai_agents", "imageRepository": "env-python-genai-agents", "tags": [ - "v11.2.0-68b51d07d61ffa11f442b02e", - "68b51d07d61ffa11f442b02e", + "v11.2.0-689e166376dbcf1206bb5ce4", + "689e166376dbcf1206bb5ce4", "v11.2.0-latest" ] } From 4ef3541f775dbde49a56941fdd54bc262f15944f Mon Sep 17 00:00:00 2001 From: Sergey Gavrenkov Date: Mon, 1 Sep 2025 00:52:18 -0400 Subject: [PATCH 30/36] delint --- .../datarobot_drum/drum/drum.py | 29 +++++++++++++------ .../datarobot_drum/drum/main.py | 20 +++++++++---- .../datarobot_drum/drum/runtime.py | 2 +- .../datarobot_drum/drum/server.py | 2 +- 4 files changed, 36 insertions(+), 17 deletions(-) diff --git a/custom_model_runner/datarobot_drum/drum/drum.py b/custom_model_runner/datarobot_drum/drum/drum.py index 96a7ea7bb..4fab14fb9 100644 --- a/custom_model_runner/datarobot_drum/drum/drum.py +++ b/custom_model_runner/datarobot_drum/drum/drum.py @@ -79,8 +79,10 @@ class CMRunner: def __init__(self, runtime, flask_app=None, worker_ctx=None): self.runtime = runtime - self.flask_app = flask_app # This is the Flask app object, used when running the application via CLI - self.worker_ctx = worker_ctx # This is the Gunicorn worker context object (WorkerCtx) + self.flask_app = ( + flask_app # This is the Flask app object, used when running the application via CLI + ) + self.worker_ctx = worker_ctx # This is the Gunicorn worker context object (WorkerCtx) self.options = runtime.options self.options.model_config = read_model_metadata_yaml(self.options.code_dir) self.options.default_parameter_values = ( @@ -501,7 +503,9 @@ def run(self): finally: if self.worker_ctx: # Add cleanup when running via the command line (gunicorn worker) - self.worker_ctx.defer_cleanup(lambda: stats_collector.disable(), desc="stats_collector.disable()") + self.worker_ctx.defer_cleanup( + lambda: stats_collector.disable(), desc="stats_collector.disable()" + ) else: if stats_collector: stats_collector.disable() @@ -845,14 +849,21 @@ def _run_predictions(self, stats_collector: Optional[StatsCollector] = None): if self.worker_ctx: # Add cleanup when running via the command line (gunicorn worker) if predictor is not None: - self.worker_ctx.defer_cleanup(lambda: predictor.terminate(), desc="predictor.terminate()") + self.worker_ctx.defer_cleanup( + lambda: predictor.terminate(), desc="predictor.terminate()" + ) if stats_collector: - self.worker_ctx.defer_cleanup(lambda: stats_collector.mark("end"), desc="stats_collector.mark('end')") - self.worker_ctx.defer_cleanup(lambda: self.logger.info( - "<<< Finish {} in the {} mode".format( - ArgumentsOptions.MAIN_COMMAND, self.run_mode.value + self.worker_ctx.defer_cleanup( + lambda: stats_collector.mark("end"), desc="stats_collector.mark('end')" ) - ), desc="logger.info(...)") + self.worker_ctx.defer_cleanup( + lambda: self.logger.info( + "<<< Finish {} in the {} mode".format( + ArgumentsOptions.MAIN_COMMAND, self.run_mode.value + ) + ), + desc="logger.info(...)", + ) else: if predictor is not None: diff --git a/custom_model_runner/datarobot_drum/drum/main.py b/custom_model_runner/datarobot_drum/drum/main.py index 27bd4bc0f..4abeef1d4 100644 --- a/custom_model_runner/datarobot_drum/drum/main.py +++ b/custom_model_runner/datarobot_drum/drum/main.py @@ -76,15 +76,23 @@ def main(flask_app=None, worker_ctx=None): # Add cleanup when running via the command line (gunicorn worker) if runtime.options and RunMode(runtime.options.subparser_name) == RunMode.SERVER: if runtime.cm_runner: - worker_ctx.defer_cleanup(lambda: runtime.cm_runner.terminate(), desc="runtime.cm_runner.terminate()") + worker_ctx.defer_cleanup( + lambda: runtime.cm_runner.terminate(), desc="runtime.cm_runner.terminate()" + ) if runtime.trace_provider is not None: - worker_ctx.defer_cleanup(lambda: runtime.trace_provider.shutdown(), desc="runtime.trace_provider.shutdown()") + worker_ctx.defer_cleanup( + lambda: runtime.trace_provider.shutdown(), + desc="runtime.trace_provider.shutdown()", + ) if runtime.metric_provider is not None: - worker_ctx.defer_cleanup(lambda: runtime.metric_provider.shutdown(), - desc="runtime.metric_provider.shutdown()") + worker_ctx.defer_cleanup( + lambda: runtime.metric_provider.shutdown(), + desc="runtime.metric_provider.shutdown()", + ) if runtime.log_provider is not None: - worker_ctx.defer_cleanup(lambda: runtime.log_provider.shutdown(), - desc="runtime.log_provider.shutdown()") + worker_ctx.defer_cleanup( + lambda: runtime.log_provider.shutdown(), desc="runtime.log_provider.shutdown()" + ) def signal_handler(sig, frame): # The signal is assigned so the stacktrace is not presented when Ctrl-C is pressed. diff --git a/custom_model_runner/datarobot_drum/drum/runtime.py b/custom_model_runner/datarobot_drum/drum/runtime.py index 38137c3ed..3af3751b4 100644 --- a/custom_model_runner/datarobot_drum/drum/runtime.py +++ b/custom_model_runner/datarobot_drum/drum/runtime.py @@ -91,7 +91,7 @@ def __exit__(self, exc_type, exc_value, exc_traceback): return False # propagate exception further -def run_error_server(host, port, exc_value, flask_app:Optional[Flask]=None): +def run_error_server(host, port, exc_value, flask_app: Optional[Flask] = None): model_api = empty_api_blueprint() @model_api.route("/", methods=["GET"]) diff --git a/custom_model_runner/datarobot_drum/drum/server.py b/custom_model_runner/datarobot_drum/drum/server.py index 2978d45c6..012d39d6b 100644 --- a/custom_model_runner/datarobot_drum/drum/server.py +++ b/custom_model_runner/datarobot_drum/drum/server.py @@ -31,7 +31,7 @@ logger = get_drum_logger(LOGGER_NAME_PREFIX) -def get_flask_app(api_blueprint, app:Optional[Flask]=None): +def get_flask_app(api_blueprint, app: Optional[Flask] = None): if app is None: app = create_flask_app() url_prefix = os.environ.get(URL_PREFIX_ENV_VAR_NAME, "") From 77f6ad2206d30645cf1cca5b4c1d617cd31ba498 Mon Sep 17 00:00:00 2001 From: Sergey Gavrenkov Date: Mon, 1 Sep 2025 00:52:51 -0400 Subject: [PATCH 31/36] revert --- public_dropin_environments/python311_genai_agents/env_info.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public_dropin_environments/python311_genai_agents/env_info.json b/public_dropin_environments/python311_genai_agents/env_info.json index 838c31b3e..7acc25582 100644 --- a/public_dropin_environments/python311_genai_agents/env_info.json +++ b/public_dropin_environments/python311_genai_agents/env_info.json @@ -4,7 +4,7 @@ "description": "This template environment can be used to create GenAI-powered agents using CrewAI, LangGraph, or Llama-Index. Similar to other drop-in environments, you can either include a .pth artifact or any other code needed to deserialize your model, and optionally a custom.py file. You can also use this environment in codespaces.", "programmingLanguage": "python", "label": "", - "environmentVersionId": "689e166376dbcf1206bb5ce4", + "environmentVersionId": "689e166376dbcf1206bb5ce4", "environmentVersionDescription": "", "isPublic": true, "isDownloadable": true, From 81eeb910c96db9c3cc0329590dee750f57edd57d Mon Sep 17 00:00:00 2001 From: Sergey Gavrenkov Date: Mon, 1 Sep 2025 00:59:29 -0400 Subject: [PATCH 32/36] added comments --- .../datarobot_drum/drum/root_predictors/prediction_server.py | 5 ++++- custom_model_runner/datarobot_drum/drum/runtime.py | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/custom_model_runner/datarobot_drum/drum/root_predictors/prediction_server.py b/custom_model_runner/datarobot_drum/drum/root_predictors/prediction_server.py index 559ca1187..bd0dd3490 100644 --- a/custom_model_runner/datarobot_drum/drum/root_predictors/prediction_server.py +++ b/custom_model_runner/datarobot_drum/drum/root_predictors/prediction_server.py @@ -70,7 +70,9 @@ class TimeoutWSGIRequestHandler(WSGIRequestHandler): class PredictionServer(PredictMixin): def __init__(self, params: dict, flask_app=None): self._params = params - self.flask_app = flask_app + self.flask_app = ( + flask_app # This is the Flask app object, used when running the application via CLI + ) self._show_perf = self._params.get("show_perf") self._resource_monitor = ResourceMonitor(monitor_current_process=True) self._run_language = RunLanguage(params.get("run_language")) @@ -330,6 +332,7 @@ def _run_flask_app(self, app): logger.info("Number of webserver processes: %s", processes) try: if self.flask_app: + # when running application via the command line (e.g., gunicorn worker) pass else: if RuntimeParameters.has("USE_NIM_WATCHDOG") and str( diff --git a/custom_model_runner/datarobot_drum/drum/runtime.py b/custom_model_runner/datarobot_drum/drum/runtime.py index 3af3751b4..ebef0e0a4 100644 --- a/custom_model_runner/datarobot_drum/drum/runtime.py +++ b/custom_model_runner/datarobot_drum/drum/runtime.py @@ -33,7 +33,9 @@ def __init__(self, flask_app: Optional[Flask] = None): self.trace_provider = None self.metric_provider = None self.log_provider = None - self.flask_app = flask_app + self.flask_app = ( + flask_app # This is the Flask app object, used when running the application via CLI + ) def __enter__(self): return self @@ -114,6 +116,7 @@ def transform(): app = get_flask_app(model_api, flask_app) if flask_app: + # when running application via the command line (e.g., gunicorn worker) pass else: app.run(host, port) From b7c58fab6efce8306d12a71531ff10161fb869f4 Mon Sep 17 00:00:00 2001 From: Sergey Gavrenkov Date: Wed, 3 Sep 2025 16:39:36 -0400 Subject: [PATCH 33/36] added is_client_request_timeout_enabled --- .../drum/root_predictors/prediction_server.py | 15 ++++++++++++++- .../datarobot_drum/drum/test_prediction_server.py | 3 ++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/custom_model_runner/datarobot_drum/drum/root_predictors/prediction_server.py b/custom_model_runner/datarobot_drum/drum/root_predictors/prediction_server.py index bd0dd3490..45693cacc 100644 --- a/custom_model_runner/datarobot_drum/drum/root_predictors/prediction_server.py +++ b/custom_model_runner/datarobot_drum/drum/root_predictors/prediction_server.py @@ -65,6 +65,10 @@ class TimeoutWSGIRequestHandler(WSGIRequestHandler): timeout = 3600 if RuntimeParameters.has("DRUM_CLIENT_REQUEST_TIMEOUT"): timeout = int(RuntimeParameters.get("DRUM_CLIENT_REQUEST_TIMEOUT")) + logger.info( + "Client request timeout is enabled, timeout: %s", + str(int(timeout)), + ) class PredictionServer(PredictMixin): @@ -322,6 +326,15 @@ def handle_exception(e): return [] + def is_client_request_timeout_enabled(self): + if ( + RuntimeParameters.has("DRUM_CLIENT_REQUEST_TIMEOUT") + and int(RuntimeParameters.get("DRUM_CLIENT_REQUEST_TIMEOUT")) > 0 + ): + return True + else: + return False + def _run_flask_app(self, app): host = self._params.get("host", None) port = self._params.get("port", None) @@ -355,7 +368,7 @@ def _run_flask_app(self, app): processes=processes, **( {"request_handler": TimeoutWSGIRequestHandler} - if RuntimeParameters.has("DRUM_CLIENT_REQUEST_TIMEOUT") + if self.is_client_request_timeout_enabled() else {} ), ) diff --git a/tests/unit/datarobot_drum/drum/test_prediction_server.py b/tests/unit/datarobot_drum/drum/test_prediction_server.py index 7e57687f0..5f21846fe 100644 --- a/tests/unit/datarobot_drum/drum/test_prediction_server.py +++ b/tests/unit/datarobot_drum/drum/test_prediction_server.py @@ -235,7 +235,8 @@ def chat_hook(completion_request, model): @pytest.mark.parametrize( - "processes_param, expected_processes, request_timeout", [(None, 1, None), (10, 10, 600)] + "processes_param, expected_processes, request_timeout", + [(None, 1, None), (None, 1, 0), (10, 10, 600)], ) def test_run_flask_app(processes_param, expected_processes, request_timeout): if request_timeout: From 8d1c1aea0ebbccc7dd820d5666cda2425eb61327 Mon Sep 17 00:00:00 2001 From: Sergey Gavrenkov Date: Wed, 3 Sep 2025 16:39:55 -0400 Subject: [PATCH 34/36] added comments --- custom_model_runner/datarobot_drum/drum/main.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/custom_model_runner/datarobot_drum/drum/main.py b/custom_model_runner/datarobot_drum/drum/main.py index 4abeef1d4..7b74cfda2 100644 --- a/custom_model_runner/datarobot_drum/drum/main.py +++ b/custom_model_runner/datarobot_drum/drum/main.py @@ -73,7 +73,10 @@ def main(flask_app=None, worker_ctx=None): config_logging() if worker_ctx: - # Add cleanup when running via the command line (gunicorn worker) + # Perform cleanup specific to the Gunicorn worker being terminated. + # Gunicorn spawns multiple worker processes to handle requests. Each worker has its own context, + # and this ensures that only the resources associated with the current worker are released. + # More details in https://github.com/datarobot/datarobot-custom-templates/pull/419 if runtime.options and RunMode(runtime.options.subparser_name) == RunMode.SERVER: if runtime.cm_runner: worker_ctx.defer_cleanup( From c2b1ef77a4e21fd84d5898d26be52602da28d6cc Mon Sep 17 00:00:00 2001 From: Sergey Gavrenkov Date: Wed, 3 Sep 2025 16:54:12 -0400 Subject: [PATCH 35/36] added comments --- custom_model_runner/datarobot_drum/drum/drum.py | 12 ++++++++++-- custom_model_runner/datarobot_drum/drum/main.py | 1 + 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/custom_model_runner/datarobot_drum/drum/drum.py b/custom_model_runner/datarobot_drum/drum/drum.py index 4fab14fb9..748b98dae 100644 --- a/custom_model_runner/datarobot_drum/drum/drum.py +++ b/custom_model_runner/datarobot_drum/drum/drum.py @@ -502,7 +502,11 @@ def run(self): self._run_predictions(stats_collector) finally: if self.worker_ctx: - # Add cleanup when running via the command line (gunicorn worker) + # Perform cleanup specific to the Gunicorn worker being terminated. + # Gunicorn spawns multiple worker processes to handle requests. Each worker has its own context, + # and this ensures that only the resources associated with the current worker are released. + # defer_cleanup simply saves methods to be executed during worker restart or shutdown. + # More details in https://github.com/datarobot/datarobot-custom-templates/pull/419 self.worker_ctx.defer_cleanup( lambda: stats_collector.disable(), desc="stats_collector.disable()" ) @@ -847,7 +851,11 @@ def _run_predictions(self, stats_collector: Optional[StatsCollector] = None): stats_collector.mark("run") finally: if self.worker_ctx: - # Add cleanup when running via the command line (gunicorn worker) + # Perform cleanup specific to the Gunicorn worker being terminated. + # Gunicorn spawns multiple worker processes to handle requests. Each worker has its own context, + # and this ensures that only the resources associated with the current worker are released. + # defer_cleanup simply saves methods to be executed during worker restart or shutdown. + # More details in https://github.com/datarobot/datarobot-custom-templates/pull/419 if predictor is not None: self.worker_ctx.defer_cleanup( lambda: predictor.terminate(), desc="predictor.terminate()" diff --git a/custom_model_runner/datarobot_drum/drum/main.py b/custom_model_runner/datarobot_drum/drum/main.py index 7b74cfda2..ff5219ca4 100644 --- a/custom_model_runner/datarobot_drum/drum/main.py +++ b/custom_model_runner/datarobot_drum/drum/main.py @@ -76,6 +76,7 @@ def main(flask_app=None, worker_ctx=None): # Perform cleanup specific to the Gunicorn worker being terminated. # Gunicorn spawns multiple worker processes to handle requests. Each worker has its own context, # and this ensures that only the resources associated with the current worker are released. + # defer_cleanup simply saves methods to be executed during worker restart or shutdown. # More details in https://github.com/datarobot/datarobot-custom-templates/pull/419 if runtime.options and RunMode(runtime.options.subparser_name) == RunMode.SERVER: if runtime.cm_runner: From b976d6bc15823f07bd9948f647fbb433e3eaecbe Mon Sep 17 00:00:00 2001 From: Sergey Gavrenkov Date: Wed, 3 Sep 2025 23:07:04 -0400 Subject: [PATCH 36/36] fix signal termination --- custom_model_runner/datarobot_drum/drum/main.py | 5 +++-- .../drum/root_predictors/prediction_server.py | 8 ++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/custom_model_runner/datarobot_drum/drum/main.py b/custom_model_runner/datarobot_drum/drum/main.py index ff5219ca4..a6522329b 100644 --- a/custom_model_runner/datarobot_drum/drum/main.py +++ b/custom_model_runner/datarobot_drum/drum/main.py @@ -129,8 +129,9 @@ def signal_handler(sig, frame): runtime.metric_provider = metric_provider runtime.log_provider = log_provider - signal.signal(signal.SIGINT, signal_handler) - signal.signal(signal.SIGTERM, signal_handler) + if worker_ctx is None: + signal.signal(signal.SIGINT, signal_handler) + signal.signal(signal.SIGTERM, signal_handler) from datarobot_drum.drum.drum import CMRunner diff --git a/custom_model_runner/datarobot_drum/drum/root_predictors/prediction_server.py b/custom_model_runner/datarobot_drum/drum/root_predictors/prediction_server.py index 45693cacc..5758d6bf9 100644 --- a/custom_model_runner/datarobot_drum/drum/root_predictors/prediction_server.py +++ b/custom_model_runner/datarobot_drum/drum/root_predictors/prediction_server.py @@ -65,10 +65,6 @@ class TimeoutWSGIRequestHandler(WSGIRequestHandler): timeout = 3600 if RuntimeParameters.has("DRUM_CLIENT_REQUEST_TIMEOUT"): timeout = int(RuntimeParameters.get("DRUM_CLIENT_REQUEST_TIMEOUT")) - logger.info( - "Client request timeout is enabled, timeout: %s", - str(int(timeout)), - ) class PredictionServer(PredictMixin): @@ -331,6 +327,10 @@ def is_client_request_timeout_enabled(self): RuntimeParameters.has("DRUM_CLIENT_REQUEST_TIMEOUT") and int(RuntimeParameters.get("DRUM_CLIENT_REQUEST_TIMEOUT")) > 0 ): + logger.info( + "Client request timeout is enabled, timeout: %s", + str(int(TimeoutWSGIRequestHandler.timeout)), + ) return True else: return False