Skip to content

Commit 1262810

Browse files
feat(rf): async preprocessing lambda
1 parent 8d478df commit 1262810

File tree

3 files changed

+242
-48
lines changed

3 files changed

+242
-48
lines changed

.pre-commit-config.yaml

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,4 @@ repos:
1818
stages: [commit-msg]
1919
verbose: true
2020
entry: bash -c 'commitlint --edit || exit 0'
21-
- repo: local
22-
hooks:
23-
- id: zizmor
24-
name: zizmor
25-
entry: bash -c 'poetry run zizmor .github/workflows/*' --
26-
language: system
27-
pass_filenames: false
28-
# - repo: https://github.com/woodruffw/zizmor-pre-commit
29-
# rev: v1.5.2
30-
# hooks:
31-
# - id: zizmor
32-
# args: [ .github/workflows/* ]
3321

tidy3d/web/api/webapi.py

Lines changed: 206 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
from tidy3d.web.core.http_util import get_version as _get_protocol_version
4242
from tidy3d.web.core.http_util import http
4343
from tidy3d.web.core.task_core import BatchDetail, BatchTask, Folder, SimulationTask
44-
from tidy3d.web.core.task_info import ChargeType, TaskInfo
44+
from tidy3d.web.core.task_info import AsyncJobDetail, ChargeType, TaskInfo
4545
from tidy3d.web.core.types import PayType
4646

4747
from .connect_util import REFRESH_TIME, get_grid_points_str, get_time_steps_str, wait_for_connection
@@ -95,6 +95,206 @@ def _batch_detail(resource_id: str):
9595
return BatchTask(resource_id).detail(batch_type="RF_SWEEP")
9696

9797

98+
def _batch_detail_error(resource_id: str) -> Optional[WebError]:
99+
"""Processes a failed batch job to generate a detailed error.
100+
101+
This function inspects the status of a batch detail object. If the status
102+
indicates an error, it logs the failure and constructs a specific `WebError`
103+
object to be returned. For validation failures, it parses and aggregates
104+
detailed error messages from each subtask.
105+
106+
Args:
107+
resource_id (str): The identifier of the batch resource that failed.
108+
109+
Returns:
110+
An instance of `WebError` if the batch failed, otherwise `None`.
111+
"""
112+
try:
113+
batch_detail = BatchTask(batch_id=resource_id).detail(batch_type="RF_SWEEP")
114+
status = batch_detail.totalStatus.value
115+
except Exception as e:
116+
log.error(f"Could not retrieve batch details for '{resource_id}': {e}")
117+
return WebError(f"Failed to retrieve status for batch '{resource_id}'.")
118+
119+
if status not in ERROR_STATES:
120+
return None
121+
122+
log.error(f"The ComponentModeler batch '{resource_id}' has failed with status: {status}")
123+
124+
if (
125+
status == "validate_fail"
126+
and hasattr(batch_detail, "validateErrors")
127+
and batch_detail.validateErrors
128+
):
129+
error_details = []
130+
for key, error_str in batch_detail.validateErrors.items():
131+
try:
132+
error_dict = json.loads(error_str)
133+
validation_error = error_dict.get("validation_error", "Unknown validation error.")
134+
msg = f"- Subtask '{key}' failed: {validation_error}"
135+
log.error(msg)
136+
error_details.append(msg)
137+
except (json.JSONDecodeError, TypeError):
138+
# Handle cases where the error string isn't valid JSON
139+
log.error(f"Could not parse validation error for subtask '{key}'.")
140+
error_details.append(f"- Subtask '{key}': Could not parse error details.")
141+
142+
details_string = "\n".join(error_details)
143+
full_error_msg = (
144+
"One or more subtasks failed validation. Please fix the component modeler configuration.\n"
145+
f"Details:\n{details_string}"
146+
)
147+
return WebError(full_error_msg)
148+
149+
# Handle all other generic error states
150+
else:
151+
error_msg = (
152+
f"Batch '{resource_id}' failed with status '{status}'. Check server "
153+
"logs for details or contact customer support."
154+
)
155+
return WebError(error_msg)
156+
157+
158+
def _upload_component_modeler_subtasks(
159+
resource_id: str, verbose: bool = True, solver_version: Optional[str] = None
160+
):
161+
"""Kicks off and monitors the split and validation of component modeler tasks.
162+
163+
This function orchestrates a two-phase process. First, it initiates a
164+
server-side asynchronous job to split the components of a modeler batch.
165+
It monitors this job's progress by polling the API and parsing the
166+
response into an `AsyncJobDetail` model until the job completes or fails.
167+
168+
If the split is successful, the function proceeds to the second phase:
169+
triggering a batch validation via `batch.check()`. It then monitors this
170+
validation process by polling for `BatchDetail` updates. The progress bar,
171+
if verbose, reflects the status according to a predefined state mapping.
172+
173+
Finally, it processes the terminal state of the validation. If a
174+
'validate_fail' status occurs, it parses detailed error messages for each
175+
failed subtask and includes them in the raised exception.
176+
177+
Args:
178+
resource_id (str): The identifier for the batch resource to be processed.
179+
verbose (bool): If True, displays progress bars and logs detailed
180+
status messages to the console during the operation.
181+
solver_version (str): Solver version in which to run validation.
182+
183+
Raises:
184+
RuntimeError: If the initial asynchronous split job fails.
185+
WebError: If the subsequent batch validation fails, ends in an
186+
unexpected state, or if a 'validate_fail' status is encountered.
187+
"""
188+
console = get_logging_console() if verbose else None
189+
final_error = None
190+
batch_type = "RF_SWEEP"
191+
192+
split_path = "tidy3d/async-biz/component-modeler-split"
193+
payload = {
194+
"batchType": batch_type,
195+
"batchId": resource_id,
196+
"fileName": "modeler.hdf5.gz",
197+
"protocolVersion": _get_protocol_version(),
198+
}
199+
200+
if verbose:
201+
console.log("Starting Modeler and Subtasks Validation...")
202+
203+
initial_resp = http.post(split_path, payload)
204+
split_job_detail = AsyncJobDetail(**initial_resp)
205+
monitor_split_path = f"{split_path}?asyncId={split_job_detail.asyncId}"
206+
207+
if verbose:
208+
progress_bar = Progress(
209+
TextColumn("[progress.description]{task.description}"),
210+
BarColumn(),
211+
TaskProgressColumn(),
212+
TimeElapsedColumn(),
213+
console=console,
214+
)
215+
216+
with progress_bar as progress:
217+
description = "Upload Subtasks"
218+
pbar = progress.add_task(description, completed=split_job_detail.progress, total=100)
219+
while True:
220+
split_job_raw_result = http.get(monitor_split_path)
221+
split_job_detail = AsyncJobDetail(**split_job_raw_result)
222+
223+
progress.update(
224+
pbar, completed=split_job_detail.progress, description=f"[blue]{description}"
225+
)
226+
227+
if split_job_detail.status in END_STATES:
228+
progress.update(
229+
pbar,
230+
completed=split_job_detail.progress,
231+
description=f"[green]{description}",
232+
)
233+
break
234+
time.sleep(RUN_REFRESH_TIME)
235+
236+
if split_job_detail.status in ERROR_STATES:
237+
msg = split_job_detail.message or "An unknown error occurred."
238+
final_error = WebError(f"Component modeler split job failed: {msg}")
239+
240+
if not final_error:
241+
description = "Validating"
242+
pbar = progress.add_task(
243+
completed=10, total=100, description=f"[blue]{description}"
244+
)
245+
batch = BatchTask(resource_id)
246+
batch.check(solver_version=solver_version, batch_type=batch_type)
247+
248+
while True:
249+
batch_detail = batch.detail(batch_type=batch_type)
250+
status = batch_detail.totalStatus
251+
progress_percent = STATE_PROGRESS_PERCENTAGE.get(status, 0)
252+
progress.update(
253+
pbar, completed=progress_percent, description=f"[blue]{description}"
254+
)
255+
256+
if status in POST_VALIDATE_STATES:
257+
progress.update(pbar, completed=100, description=f"[green]{description}")
258+
task_mapping = json.loads(split_job_detail.result)
259+
console.log(
260+
f"Uploaded Subtasks: \n{_task_dict_to_url_bullet_list(task_mapping)}"
261+
)
262+
progress.refresh()
263+
break
264+
elif status in ERROR_STATES:
265+
progress.update(pbar, completed=0, description=f"[red]{description}")
266+
progress.refresh()
267+
break
268+
time.sleep(RUN_REFRESH_TIME)
269+
270+
else:
271+
# Non-verbose mode: Poll for split job completion.
272+
while True:
273+
split_job_raw_result = http.get(monitor_split_path)
274+
split_job_detail = AsyncJobDetail(**split_job_raw_result)
275+
if split_job_detail.status in END_STATES:
276+
break
277+
time.sleep(RUN_REFRESH_TIME)
278+
279+
# Check for split job failure.
280+
if split_job_detail.status in ERROR_STATES:
281+
msg = split_job_detail.message or "An unknown error occurred."
282+
final_error = WebError(f"Component modeler split job failed: {msg}")
283+
284+
# If split succeeded, poll for validation completion.
285+
if not final_error:
286+
batch = BatchTask(resource_id)
287+
batch.check(solver_version=solver_version, batch_type=batch_type)
288+
while True:
289+
batch_detail = batch.detail(batch_type=batch_type)
290+
status = batch_detail.totalStatus
291+
if status in POST_VALIDATE_STATES or status in END_STATES:
292+
break
293+
time.sleep(RUN_REFRESH_TIME)
294+
295+
return _batch_detail_error(resource_id=resource_id)
296+
297+
98298
def _task_dict_to_url_bullet_list(data_dict: dict) -> str:
99299
"""
100300
Converts a dictionary into a string formatted as a bullet point list.
@@ -107,6 +307,8 @@ def _task_dict_to_url_bullet_list(data_dict: dict) -> str:
107307
"""
108308
# Use a list comprehension to format each key-value pair
109309
# and then join them together with newline characters.
310+
if data_dict is None:
311+
raise WebError("Error in subtask dictionary data.")
110312
return "\n".join([f"- {key}: '{value}'" for key, value in data_dict.items()])
111313

112314

@@ -391,26 +593,7 @@ def upload(
391593
)
392594

393595
if task_type == "RF":
394-
split_path = "tidy3d/projects/component-modeler-split"
395-
payload = {
396-
"batchType": "RF_SWEEP",
397-
"batchId": resource_id,
398-
"fileName": "modeler.hdf5.gz",
399-
"protocolVersion": _get_protocol_version(),
400-
}
401-
resp = http.post(split_path, payload)
402-
if verbose:
403-
console = get_logging_console()
404-
console.log(
405-
f"Child simulation subtasks are being uploaded to \n{_task_dict_to_url_bullet_list(resp)}"
406-
)
407-
# split (modeler-specific)
408-
batch = BatchTask(resource_id)
409-
# Kick off server-side validation for the RF batch.
410-
batch.check(solver_version=solver_version, batch_type="RF_SWEEP")
411-
if verbose:
412-
# Validation phase
413-
console.log("Validating component modeler and subtask simulations...")
596+
_upload_component_modeler_subtasks(resource_id=resource_id, verbose=verbose)
414597

415598
estimate_cost(task_id=resource_id, solver_version=solver_version, verbose=verbose)
416599

@@ -555,7 +738,7 @@ def start(
555738
solver_version=solver_version, batch_type="RF_SWEEP", worker_group=worker_group
556739
)
557740
if verbose:
558-
console.log(f"Component Modeler '{task_id}' validation succeeded. Starting to solve...")
741+
console.log(f"Component Modeler '{task_id}' validated. Solving...")
559742
return
560743

561744
if priority is not None and (priority < 1 or priority > 10):
@@ -1407,20 +1590,7 @@ def estimate_cost(
14071590
return est_flex_unit
14081591

14091592
elif status in ERROR_STATES:
1410-
log.error(f"The ComponentModeler '{task_id}' has failed: {status}")
1411-
1412-
if status == "validate_fail":
1413-
assert d.validateErrors is not None
1414-
for key, error in d.validateErrors.items():
1415-
# I don't like this ideally but would like to control the endpoint to make this better
1416-
error_dict = json.loads(error)
1417-
validation_error = error_dict["validation_error"]
1418-
log.error(
1419-
f"Subtask '{key}' has failed to validate:"
1420-
f" \n {validation_error} \n "
1421-
f"Fix your component modeler configuration. "
1422-
f"Generate subtask simulations locally using `ComponentModelerType.sim_dict`."
1423-
)
1593+
return _batch_detail_error(resource_id=task_id)
14241594

14251595
raise WebError("Could not get estimated cost!")
14261596

tidy3d/web/core/task_info.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,3 +317,39 @@ class BatchDetail(TaskBase):
317317
message: str = None
318318
tasks: list[BatchMember] = []
319319
validateErrors: dict = None
320+
321+
322+
class AsyncJobDetail(TaskBase):
323+
"""
324+
Provides a detailed view of an asynchronous job and its sub-tasks.
325+
326+
This model represents a long-running operation. The 'result' attribute holds
327+
the output of a completed job, which for orchestration jobs, is often a
328+
JSON string mapping sub-task names to their unique IDs.
329+
330+
Attributes:
331+
asyncId: The unique identifier for the asynchronous job.
332+
status: The current overall status of the job (e.g., 'RUNNING', 'COMPLETED').
333+
progress: The completion percentage of the job (from 0.0 to 100.0).
334+
createdAt: The timestamp when the job was created.
335+
completedAt: The timestamp when the job finished (successfully or not).
336+
tasks: A dictionary mapping logical task keys to their unique task IDs.
337+
This is often populated by parsing the 'result' of an orchestration task.
338+
result: The raw string output of the completed job. If the job spawns other
339+
tasks, this is expected to be a JSON string detailing those tasks.
340+
taskBlockInfo: Information on any dependencies blocking the job from running.
341+
message: A human-readable message about the job's status.
342+
"""
343+
344+
asyncId: str
345+
status: str
346+
progress: Optional[float] = None
347+
createdAt: Optional[datetime] = None
348+
completedAt: Optional[datetime] = None
349+
tasks: Optional[dict[str, str]] = None
350+
result: Optional[str] = None
351+
taskBlockInfo: Optional[TaskBlockInfo] = None
352+
message: Optional[str] = None
353+
354+
355+
AsyncJobDetail.update_forward_refs()

0 commit comments

Comments
 (0)