-
Notifications
You must be signed in to change notification settings - Fork 749
Independent Dynamic Scaling for different Activities in Temporal WorkFlow #4159
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
0ca2cb7
383a7cd
a428b4b
7b98621
af5e22b
c6f8647
4bdc43a
c31b0ca
bb49427
85959e3
82b4dc0
04b2b6c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -72,6 +72,19 @@ public ActivityOptions buildActivityOptions(Properties props, boolean setHeartbe | |||||||||||||||||||||||||
| .build(); | ||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||
| public ActivityOptions buildActivityOptions(Properties props, boolean setHeartbeatTimeout, String taskQueue) { | ||||||||||||||||||||||||||
| ActivityOptions.Builder builder = ActivityOptions.newBuilder() | ||||||||||||||||||||||||||
| .setStartToCloseTimeout(getStartToCloseTimeout(props)) | ||||||||||||||||||||||||||
| .setRetryOptions(buildRetryOptions(props)) | ||||||||||||||||||||||||||
| .setTaskQueue(taskQueue); | ||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||
| if (setHeartbeatTimeout) { | ||||||||||||||||||||||||||
| builder.setHeartbeatTimeout(getHeartbeatTimeout(props)); | ||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||
| return builder.build(); | ||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||
|
Comment on lines
+75
to
+87
|
||||||||||||||||||||||||||
| public ActivityOptions buildActivityOptions(Properties props, boolean setHeartbeatTimeout, String taskQueue) { | |
| ActivityOptions.Builder builder = ActivityOptions.newBuilder() | |
| .setStartToCloseTimeout(getStartToCloseTimeout(props)) | |
| .setRetryOptions(buildRetryOptions(props)) | |
| .setTaskQueue(taskQueue); | |
| if (setHeartbeatTimeout) { | |
| builder.setHeartbeatTimeout(getHeartbeatTimeout(props)); | |
| } | |
| return builder.build(); | |
| } |
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -25,13 +25,16 @@ | |||||
| import lombok.extern.slf4j.Slf4j; | ||||||
|
|
||||||
| import org.apache.gobblin.runtime.JobState; | ||||||
| import org.apache.gobblin.temporal.GobblinTemporalConfigurationKeys; | ||||||
| import org.apache.gobblin.temporal.ddm.activity.RecommendScalingForWorkUnits; | ||||||
| import org.apache.gobblin.temporal.ddm.work.TimeBudget; | ||||||
| import org.apache.gobblin.temporal.ddm.work.WorkUnitsSizeSummary; | ||||||
| import org.apache.gobblin.temporal.ddm.workflow.WorkflowStage; | ||||||
| import org.apache.gobblin.temporal.dynamic.ProfileDerivation; | ||||||
| import org.apache.gobblin.temporal.dynamic.ProfileOverlay; | ||||||
| import org.apache.gobblin.temporal.dynamic.ScalingDirective; | ||||||
| import org.apache.gobblin.temporal.dynamic.WorkforceProfiles; | ||||||
| import org.apache.gobblin.yarn.GobblinYarnConfigurationKeys; | ||||||
|
|
||||||
|
|
||||||
| /** | ||||||
|
|
@@ -62,8 +65,9 @@ public List<ScalingDirective> recommendScaling(WorkUnitsSizeSummary remainingWor | |||||
| protected abstract int calcDerivationSetPoint(WorkUnitsSizeSummary remainingWork, String sourceClass, TimeBudget timeBudget, JobState jobState); | ||||||
|
|
||||||
| protected ProfileDerivation calcProfileDerivation(String basisProfileName, WorkUnitsSizeSummary remainingWork, String sourceClass, JobState jobState) { | ||||||
| // TODO: implement right-sizing!!! (for now just return unchanged) | ||||||
| return new ProfileDerivation(basisProfileName, ProfileOverlay.unchanged()); | ||||||
| // Create overlay with execution-specific memory and worker class | ||||||
| ProfileOverlay overlay = createExecutionWorkerOverlay(jobState); | ||||||
| return new ProfileDerivation(basisProfileName, overlay); | ||||||
| } | ||||||
|
|
||||||
| protected String calcProfileDerivationName(JobState jobState) { | ||||||
|
|
@@ -72,6 +76,28 @@ protected String calcProfileDerivationName(JobState jobState) { | |||||
| } | ||||||
|
|
||||||
| protected String calcBasisProfileName(JobState jobState) { | ||||||
| return WorkforceProfiles.BASELINE_NAME; // always build upon baseline | ||||||
| // Always derive from the global baseline | ||||||
| return WorkforceProfiles.BASELINE_NAME; | ||||||
| } | ||||||
|
|
||||||
| private ProfileOverlay createExecutionWorkerOverlay(JobState jobState) { | ||||||
| List<ProfileOverlay.KVPair> overlayPairs = new java.util.ArrayList<>(); | ||||||
|
|
||||||
| // Add execution-specific memory if configured (overrides baseline memory) | ||||||
| if (jobState.contains(GobblinTemporalConfigurationKeys.WORK_EXECUTION_MEMORY_MB)) { | ||||||
| overlayPairs.add(new ProfileOverlay.KVPair( | ||||||
| GobblinYarnConfigurationKeys.CONTAINER_MEMORY_MBS_KEY, | ||||||
| jobState.getProp(GobblinTemporalConfigurationKeys.WORK_EXECUTION_MEMORY_MB) | ||||||
| )); | ||||||
| } | ||||||
|
|
||||||
| // Add ExecutionWorker class to ensure correct task queue routing | ||||||
| overlayPairs.add(new ProfileOverlay.KVPair( | ||||||
| GobblinTemporalConfigurationKeys.WORKER_CLASS, | ||||||
| GobblinTemporalConfigurationKeys.EXECUTION_WORKER_CLASS | ||||||
| )); | ||||||
|
|
||||||
| return overlayPairs.isEmpty() ? ProfileOverlay.unchanged() : new ProfileOverlay.Adding(overlayPairs); | ||||||
|
||||||
| return overlayPairs.isEmpty() ? ProfileOverlay.unchanged() : new ProfileOverlay.Adding(overlayPairs); | |
| return new ProfileOverlay.Adding(overlayPairs); |
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,86 @@ | ||||||
| /* | ||||||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||||||
| * contributor license agreements. See the NOTICE file distributed with | ||||||
| * this work for additional information regarding copyright ownership. | ||||||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||||||
| * (the "License"); you may not use this file except in compliance with | ||||||
| * the License. You may obtain a copy of the License at | ||||||
| * | ||||||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||||||
| * | ||||||
| * Unless required by applicable law or agreed to in writing, software | ||||||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||
| * See the License for the specific language governing permissions and | ||||||
| * limitations under the License. | ||||||
| */ | ||||||
|
|
||||||
| package org.apache.gobblin.temporal.ddm.worker; | ||||||
|
|
||||||
| import java.util.concurrent.TimeUnit; | ||||||
|
|
||||||
| import com.typesafe.config.Config; | ||||||
|
|
||||||
| import io.temporal.client.WorkflowClient; | ||||||
| import io.temporal.worker.WorkerOptions; | ||||||
|
|
||||||
| import org.apache.gobblin.temporal.GobblinTemporalConfigurationKeys; | ||||||
| import org.apache.gobblin.temporal.cluster.AbstractTemporalWorker; | ||||||
| import org.apache.gobblin.temporal.ddm.activity.impl.ProcessWorkUnitImpl; | ||||||
| import org.apache.gobblin.temporal.ddm.workflow.impl.NestingExecOfProcessWorkUnitWorkflowImpl; | ||||||
| import org.apache.gobblin.temporal.ddm.workflow.impl.ProcessWorkUnitsWorkflowImpl; | ||||||
| import org.apache.gobblin.util.ConfigUtils; | ||||||
|
|
||||||
|
|
||||||
| /** | ||||||
| * Specialized worker for Work Execution stage. | ||||||
| * This worker only registers activities for: | ||||||
| * - ProcessWorkUnit (Work Execution) | ||||||
| * | ||||||
| * Runs on containers with stage-specific memory for work execution operations. | ||||||
| * Polls the execution task queue to ensure activities run on appropriately-sized containers. | ||||||
| */ | ||||||
| public class ExecutionWorker extends AbstractTemporalWorker { | ||||||
| public static final long DEADLOCK_DETECTION_TIMEOUT_SECONDS = 120; | ||||||
| public int maxExecutionConcurrency; | ||||||
|
||||||
| public int maxExecutionConcurrency; | |
| private int maxExecutionConcurrency; |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,72 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.gobblin.temporal.ddm.workflow; | ||
|
|
||
| import com.typesafe.config.Config; | ||
| import lombok.Getter; | ||
|
|
||
| import org.apache.gobblin.temporal.GobblinTemporalConfigurationKeys; | ||
|
|
||
| /** | ||
| * Represents the different stages of a Gobblin Temporal workflow. | ||
| * | ||
| * <p>Stages: | ||
| * <ul> | ||
| * <li>WORK_DISCOVERY: Discovers data sources, generates work units (uses default queue)</li> | ||
| * <li>WORK_EXECUTION: Processes work units to transform and load data (uses execution queue when dynamic scaling enabled)</li> | ||
| * <li>COMMIT: Commits work units (uses default queue)</li> | ||
| * </ul> | ||
| * | ||
| * <p>Queue routing: | ||
| * <ul> | ||
| * <li>Dynamic scaling OFF: All stages use default queue</li> | ||
| * <li>Dynamic scaling ON: WORK_EXECUTION uses dedicated execution queue, others use default queue</li> | ||
| * </ul> | ||
| */ | ||
| @Getter | ||
| public enum WorkflowStage { | ||
| WORK_DISCOVERY("workDiscovery", GobblinTemporalConfigurationKeys.GOBBLIN_TEMPORAL_TASK_QUEUE, | ||
| GobblinTemporalConfigurationKeys.DEFAULT_GOBBLIN_TEMPORAL_TASK_QUEUE), | ||
| WORK_EXECUTION("workExecution", GobblinTemporalConfigurationKeys.EXECUTION_TASK_QUEUE, | ||
| GobblinTemporalConfigurationKeys.DEFAULT_EXECUTION_TASK_QUEUE), | ||
| COMMIT("commit", GobblinTemporalConfigurationKeys.GOBBLIN_TEMPORAL_TASK_QUEUE, | ||
| GobblinTemporalConfigurationKeys.DEFAULT_GOBBLIN_TEMPORAL_TASK_QUEUE); | ||
|
|
||
| private final String profileBaseName; | ||
| private final String taskQueueConfigKey; | ||
| private final String defaultTaskQueue; | ||
|
|
||
| WorkflowStage(String profileBaseName, String taskQueueConfigKey, String defaultTaskQueue) { | ||
| this.profileBaseName = profileBaseName; | ||
| this.taskQueueConfigKey = taskQueueConfigKey; | ||
| this.defaultTaskQueue = defaultTaskQueue; | ||
| } | ||
|
|
||
| /** | ||
| * Returns the task queue for this stage, reading from config or using default. | ||
| * Example: "GobblinTemporalDiscoveryCommitQueue", "GobblinTemporalExecutionQueue" | ||
| * | ||
| * @param config the configuration to read from | ||
| * @return the task queue name for this stage | ||
| */ | ||
| public String getTaskQueue(Config config) { | ||
| return config.hasPath(taskQueueConfigKey) | ||
| ? config.getString(taskQueueConfigKey) | ||
| : defaultTaskQueue; | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add a documentation comment for the WORK_EXECUTION_MEMORY_MB configuration key to explain its purpose and usage, similar to other configuration keys in this interface. The comment should clarify that this is the memory allocation in megabytes for execution worker containers when dynamic scaling is enabled.