-
Notifications
You must be signed in to change notification settings - Fork 1
DM-47970: Make raw deletion more thorough. #315
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1834,30 +1834,36 @@ def _query_datasets_by_storage_class(self, butler, exposure_ids, collections, st | |
| ) for t in matching_types | ||
| ) | ||
|
|
||
| def clean_local_repo(self, exposure_ids: set[int]) -> None: | ||
| def clean_local_repo(self) -> None: | ||
| """Remove local repo content that is only needed for a single visit. | ||
|
|
||
| This includes raws and pipeline outputs. | ||
|
|
||
| Parameter | ||
| --------- | ||
| exposure_ids : `set` [`int`] | ||
| Identifiers of the exposures to be removed. | ||
| """ | ||
| with lsst.utils.timer.time_this(_log, msg="clean_local_repo", level=logging.DEBUG): | ||
| self.butler.registry.refresh() | ||
| if exposure_ids: | ||
| raws = self.butler.query_datasets( | ||
| 'raw', | ||
| collections=self.instrument.makeDefaultRawIngestRunName(), | ||
| where=f"exposure in ({', '.join(str(x) for x in exposure_ids)})", | ||
| find_first=False, | ||
| explain=False, # Raws might not have been ingested. | ||
| instrument=self.visit.instrument, | ||
| detector=self.visit.detector, | ||
| ) | ||
| _log_trace.debug("Removing %d raws for exposures %s.", len(raws), exposure_ids) | ||
| self.butler.pruneDatasets(raws, disassociate=True, unstore=True, purge=True) | ||
|
|
||
| # Clean out raws | ||
| raws = self.butler.query_datasets( | ||
| "raw", | ||
| collections=self.instrument.makeDefaultRawIngestRunName(), | ||
| find_first=False, | ||
| explain=False, # Raws might not have been ingested. | ||
| instrument=self.visit.instrument, | ||
| detector=self.visit.detector, | ||
| ) | ||
| n_raws = len(raws) | ||
| if n_raws == 0: | ||
| _log_trace.debug("No raws to remove for detector %s.", self.visit.detector) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think the "for detector" bit really makes sense here -- the point is if we accidentally left some old raws because of a logic bug, they'll get cleaned up too. |
||
| else: | ||
| _log_trace.debug("Removing %d raw(s) for detector %s.", n_raws, self.visit.detector) | ||
| try: | ||
| self.butler.pruneDatasets(raws, disassociate=True, unstore=True, purge=True) | ||
| except Exception: | ||
| _log_trace.exception("Raw removal failed for detector %s.", self.visit.detector) | ||
| raise | ||
|
Comment on lines
+1861
to
+1863
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please do not use log-and-reraise -- exceptions should only be logged once, when they are handled. If this were a reasonable place to log the exception, it would not make sense to log it at trace level (if something unexpectedly goes wrong, that's at least a warning). |
||
| _log_trace.debug( | ||
| "Successfully removed %d raw(s) for detector %s.", n_raws, self.visit.detector) | ||
|
|
||
| # Outputs are all in their own runs, so just drop them. | ||
| preload_run = runs.get_preload_run(self.instrument, self._deployment, self._day_obs) | ||
| _remove_run_completely(self.butler, preload_run) | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oops, almost missed this -- please delete all raws unconditionally (maybe with an
instrumentconstraint, but I think the query will resolve just fine without it). That's kind of the point...