From 4eedd86576e9f6630ef4c6fd6aaf1369cd4b8f61 Mon Sep 17 00:00:00 2001 From: Azamat G Date: Thu, 16 Mar 2023 19:22:15 +0300 Subject: [PATCH] Add flag to use cluster spark session --- replay/session_handler.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/replay/session_handler.py b/replay/session_handler.py index 9a2d26c5b..b27a4b785 100644 --- a/replay/session_handler.py +++ b/replay/session_handler.py @@ -24,6 +24,9 @@ def get_spark_session( 70% of RAM by default. :param shuffle_partitions: number of partitions for Spark; triple CPU count by default """ + if os.environ.get("SCRIPT_ENV", None) == "cluster": + return SparkSession.builder.getOrCreate() + os.environ["PYSPARK_PYTHON"] = sys.executable os.environ["PYSPARK_DRIVER_PYTHON"] = sys.executable