Skip to content

Commit 5f76b6c

Browse files
Add execution start and end time metadata for code cells (#29)
* add-execution-timing-metadata * add test for timing metadata * add additional config * lint fix * add ydoc as test dependency * improve code * drop stop_extension() and del fim * add jupyter-server-fileid in test-dependencies * resolve CI stall * add additional plugins * Attempt to fix the test issues, cleanup * merge conflict resolve * add execution start event * export fix * code improvements * clear execute metadata * Also record `iopub.execute_input` --------- Co-authored-by: krassowski <[email protected]>
1 parent ef8181c commit 5f76b6c

File tree

7 files changed

+221
-11
lines changed

7 files changed

+221
-11
lines changed

conftest.py

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,33 @@
11
import pytest
22

3-
pytest_plugins = ("pytest_jupyter.jupyter_server",)
3+
pytest_plugins = [
4+
"pytest_jupyter.jupyter_server",
5+
"jupyter_server.pytest_plugin",
6+
"jupyter_server_fileid.pytest_plugin",
7+
"jupyter_server_ydoc.pytest_plugin"
8+
]
49

510

611
@pytest.fixture
7-
def jp_server_config(jp_server_config):
12+
def jp_server_config(jp_root_dir, jp_server_config):
813
return {
9-
"ServerApp": {
10-
"jpserver_extensions": {"jupyter_server_nbmodel": True, "jupyter_server_ydoc": False}
14+
'ServerApp': {
15+
'jpserver_extensions': {
16+
'jupyter_server_ydoc': True,
17+
'jupyter_server_fileid': True,
18+
'jupyter_server_nbmodel': True,
19+
},
20+
'token': '',
21+
'password': '',
22+
'disable_check_xsrf': True
23+
},
24+
"SQLiteYStore": {"db_path": str(jp_root_dir.joinpath(".rtc_test.db"))},
25+
"BaseFileIdManager": {
26+
"root_dir": str(jp_root_dir),
27+
"db_path": str(jp_root_dir.joinpath(".fid_test.db")),
28+
"db_journal_mode": "OFF",
29+
},
30+
"YDocExtension": {
31+
"document_save_delay": 1
1132
}
1233
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
from jupyter_events import EventLogger
2+
import pathlib
3+
4+
_JUPYTER_SERVER_EVENTS_URI = "https://events.jupyter.org/jupyter_server_nbmodel"
5+
_DEFAULT_EVENTS_SCHEMA_PATH = pathlib.Path(__file__).parent / "event_schemas"
6+
7+
class _EventLogger:
8+
_event_logger = None
9+
10+
@classmethod
11+
def init_event_logger(cls) -> EventLogger:
12+
"""Initialize or return the existing Event Logger."""
13+
if cls._event_logger is None:
14+
cls._event_logger = EventLogger()
15+
schema_ids = [
16+
"https://events.jupyter.org/jupyter_server_nbmodel/cell_execution/v1",
17+
]
18+
for schema_id in schema_ids:
19+
rel_schema_path = schema_id.replace(_JUPYTER_SERVER_EVENTS_URI + "/", "") + ".yaml"
20+
schema_path = _DEFAULT_EVENTS_SCHEMA_PATH / rel_schema_path
21+
cls._event_logger.register_event_schema(schema_path)
22+
return cls._event_logger
23+
24+
25+
event_logger = _EventLogger.init_event_logger()
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
"$id": https://events.jupyter.org/jupyter_server_nbmodel/cell_execution/v1
2+
version: "1"
3+
title: Cell Execution activities
4+
personal-data: true
5+
description: |
6+
Record events of a cell execution.
7+
type: object
8+
required:
9+
- event_type
10+
- cell_id
11+
- document_id
12+
- timestamp
13+
properties:
14+
event_type:
15+
enum:
16+
- execution_start
17+
- execution_end
18+
19+
cell_id:
20+
type: string
21+
description: |
22+
Cell id.
23+
24+
document_id:
25+
type: string
26+
description: |
27+
Document id.
28+
29+
success:
30+
type: boolean
31+
description: |
32+
Whether the cell execution was successful or not.
33+
34+
kernel_error:
35+
type: string
36+
description: |
37+
Error message from the kernel.
38+
39+
timestamp:
40+
type: string
41+
description: |
42+
Timestamp of the event in ISO format.

jupyter_server_nbmodel/handlers.py

Lines changed: 49 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from dataclasses import asdict, dataclass
99
from functools import partial
1010
from http import HTTPStatus
11+
from datetime import datetime, timezone
1112

1213
import jupyter_server
1314
import jupyter_server.services
@@ -18,8 +19,10 @@
1819
from jupyter_core.utils import ensure_async
1920
from jupyter_server.base.handlers import APIHandler
2021
from jupyter_server.extension.handler import ExtensionHandlerMixin
22+
from jupyter_events import EventLogger
2123

2224
from .log import get_logger
25+
from .event_logger import event_logger
2326

2427
if t.TYPE_CHECKING:
2528
import jupyter_client
@@ -123,7 +126,6 @@ async def _get_ycell(
123126
raise KeyError(
124127
msg,
125128
)
126-
127129
return ycell
128130

129131

@@ -199,6 +201,13 @@ def _stdin_hook(kernel_id: str, request_id: str, pending_input: PendingInput, ms
199201
parent_header=header, input_request=InputRequest(**msg["content"])
200202
)
201203

204+
def _get_error(outputs):
205+
return "\n".join(
206+
f"{output['ename']}: {output['evalue']}"
207+
for output in outputs
208+
if output.get("output_type") == "error"
209+
)
210+
202211

203212
async def _execute_snippet(
204213
client: jupyter_client.asynchronous.client.AsyncKernelClient,
@@ -219,15 +228,34 @@ async def _execute_snippet(
219228
The execution status and outputs.
220229
"""
221230
ycell = None
231+
time_info = {}
222232
if metadata is not None:
223233
ycell = await _get_ycell(ydoc, metadata)
224234
if ycell is not None:
235+
execution_start_time = datetime.now(timezone.utc).isoformat()[:-6]
225236
# Reset cell
226237
with ycell.doc.transaction():
227238
del ycell["outputs"][:]
228239
ycell["execution_count"] = None
229240
ycell["execution_state"] = "running"
230-
241+
if "execution" in ycell["metadata"]:
242+
del ycell["metadata"]["execution"]
243+
if metadata.get("record_timing", False):
244+
time_info = ycell["metadata"].get("execution", {})
245+
time_info["shell.execute_reply.started"] = execution_start_time
246+
# for compatibility with jupyterlab-execute-time also set:
247+
time_info["iopub.execute_input"] = execution_start_time
248+
ycell["metadata"]["execution"] = time_info
249+
# Emit cell execution start event
250+
event_logger.emit(
251+
schema_id="https://events.jupyter.org/jupyter_server_nbmodel/cell_execution/v1",
252+
data={
253+
"event_type": "execution_start",
254+
"cell_id": metadata["cell_id"],
255+
"document_id": metadata["document_id"],
256+
"timestamp": execution_start_time
257+
}
258+
)
231259
outputs = []
232260

233261
# FIXME we don't check if the session is consistent (aka the kernel is linked to the document)
@@ -244,10 +272,28 @@ async def _execute_snippet(
244272
reply_content = reply["content"]
245273

246274
if ycell is not None:
275+
execution_end_time = datetime.now(timezone.utc).isoformat()[:-6]
247276
with ycell.doc.transaction():
248277
ycell["execution_count"] = reply_content.get("execution_count")
249278
ycell["execution_state"] = "idle"
250-
279+
if metadata and metadata.get("record_timing", False):
280+
if reply_content["status"] == "ok":
281+
time_info["shell.execute_reply"] = execution_end_time
282+
else:
283+
time_info["execution_failed"] = execution_end_time
284+
ycell["metadata"]["execution"] = time_info
285+
# Emit cell execution end event
286+
event_logger.emit(
287+
schema_id="https://events.jupyter.org/jupyter_server_nbmodel/cell_execution/v1",
288+
data={
289+
"event_type": "execution_end",
290+
"cell_id": metadata["cell_id"],
291+
"document_id": metadata["document_id"],
292+
"success": reply_content["status"]=="ok",
293+
"kernel_error": _get_error(outputs),
294+
"timestamp": execution_end_time
295+
}
296+
)
251297
return {
252298
"status": reply_content["status"],
253299
"execution_count": reply_content.get("execution_count"),
@@ -524,9 +570,7 @@ async def post(self, kernel_id: str) -> None:
524570
msg = f"Unknown kernel with id: {kernel_id}"
525571
get_logger().error(msg)
526572
raise tornado.web.HTTPError(status_code=HTTPStatus.NOT_FOUND, reason=msg)
527-
528573
uid = self._execution_stack.put(kernel_id, snippet, metadata)
529-
530574
self.set_status(HTTPStatus.ACCEPTED)
531575
self.set_header("Location", f"/api/kernels/{kernel_id}/requests/{uid}")
532576
self.finish("{}")

jupyter_server_nbmodel/tests/test_handlers.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import datetime
33
import json
44
import re
5+
import nbformat
56

67
import pytest
78
from jupyter_client.kernelspec import NATIVE_KERNEL_NAME
@@ -149,6 +150,67 @@ async def test_post_erroneous_execute(jp_fetch, pending_kernel_is_ready, snippet
149150
await asyncio.sleep(1)
150151

151152

153+
@pytest.mark.timeout(TEST_TIMEOUT)
154+
async def test_execution_timing_metadata(jp_root_dir, jp_fetch, pending_kernel_is_ready, rtc_create_notebook, jp_serverapp):
155+
snippet = "a = 1"
156+
nb = nbformat.v4.new_notebook(
157+
cells=[nbformat.v4.new_code_cell(source=snippet, execution_count=1)]
158+
)
159+
nb_content = nbformat.writes(nb, version=4)
160+
path, _ = await rtc_create_notebook("test.ipynb", nb_content, store=True)
161+
collaboration = jp_serverapp.web_app.settings["jupyter_server_ydoc"]
162+
fim = jp_serverapp.web_app.settings["file_id_manager"]
163+
document_id = f'json:notebook:{fim.get_id("test.ipynb")}'
164+
cell_id = nb["cells"][0].get("id")
165+
166+
r = await jp_fetch(
167+
"api", "kernels", method="POST", body=json.dumps({"name": NATIVE_KERNEL_NAME})
168+
)
169+
kernel = json.loads(r.body.decode())
170+
await pending_kernel_is_ready(kernel["id"])
171+
172+
response = await wait_for_request(
173+
jp_fetch,
174+
"api",
175+
"kernels",
176+
kernel["id"],
177+
"execute",
178+
method="POST",
179+
body=json.dumps({
180+
"code": snippet,
181+
"metadata": {
182+
"cell_id": cell_id,
183+
"document_id": document_id,
184+
"record_timing": True
185+
}
186+
}),
187+
)
188+
assert response.code == 200
189+
190+
document = await collaboration.get_document(
191+
path=path, content_type="notebook", file_format="json", copy=False
192+
)
193+
cell_data = document.get()["cells"][0]
194+
assert 'execution' in cell_data['metadata'], "'execution' does not exist in 'metadata'"
195+
196+
# Assert that start and end time exist in 'execution'
197+
execution = cell_data['metadata']['execution']
198+
assert 'shell.execute_reply.started' in execution, "'shell.execute_reply.started' does not exist in 'execution'"
199+
assert 'shell.execute_reply' in execution, "'shell.execute_reply' does not exist in 'execution'"
200+
201+
started_time = execution['shell.execute_reply.started']
202+
reply_time = execution['shell.execute_reply']
203+
204+
started_dt = datetime.datetime.fromisoformat(started_time)
205+
reply_dt = datetime.datetime.fromisoformat(reply_time)
206+
207+
# Assert that reply_time is greater than started_time
208+
assert reply_dt > started_dt, "The reply time is not greater than the started time."
209+
response2 = await jp_fetch("api", "kernels", kernel["id"], method="DELETE")
210+
assert response2.code == 204
211+
await asyncio.sleep(1)
212+
213+
152214
@pytest.mark.timeout(TEST_TIMEOUT)
153215
async def test_post_input_execute(jp_fetch, pending_kernel_is_ready):
154216
# Start the first kernel

pyproject.toml

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,14 @@ dynamic = ["version", "description", "authors", "urls", "keywords"]
3030

3131
[project.optional-dependencies]
3232
lab = ["jupyterlab>=4.2.0", "jupyter-docprovider>=1.0.0b1", "jupyter-server-ydoc>=1.0.0b1"]
33-
test = ["pytest~=8.2", "pytest-cov", "pytest-jupyter[server]>=0.6", "pytest-timeout"]
33+
test = [
34+
"pytest~=8.2",
35+
"pytest-cov",
36+
"pytest-jupyter[server]>=0.6",
37+
"pytest-timeout",
38+
"jupyter-server-ydoc[test]>=1.0.0b1",
39+
"jupyter-server-fileid"
40+
]
3441
lint = ["mdformat>0.7", "mdformat-gfm>=0.3.5", "ruff>=0.4.0"]
3542
typing = ["mypy>=0.990"]
3643

@@ -74,10 +81,14 @@ build_dir = "jupyter_server_nbmodel/labextension"
7481
[tool.pytest.ini_options]
7582
filterwarnings = [
7683
"error",
84+
"ignore:Unclosed context <zmq.asyncio.Context:ResourceWarning",
85+
"ignore:Unclosed socket <zmq.asyncio.Socket:ResourceWarning",
7786
"ignore:There is no current event loop:DeprecationWarning",
7887
"module:make_current is deprecated:DeprecationWarning",
7988
"module:clear_current is deprecated:DeprecationWarning",
8089
"module:Jupyter is migrating its paths to use standard platformdirs:DeprecationWarning",
90+
# From anyio https://github.com/agronholm/anyio/pull/715
91+
"ignore:Unclosed <MemoryObjectSendStream:ResourceWarning",
8192
]
8293

8394
[tool.mypy]

src/executor.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,12 +108,17 @@ export class NotebookCellServerExecutor implements INotebookCellExecutor {
108108
const code = cell.model.sharedModel.getSource();
109109
const cellId = cell.model.sharedModel.getId();
110110
const documentId = notebook.sharedModel.getState('document_id');
111+
const { recordTiming } = notebookConfig;
111112

112113
const init = {
113114
method: 'POST',
114115
body: JSON.stringify({
115116
code,
116-
metadata: { cell_id: cellId, document_id: documentId }
117+
metadata: {
118+
cell_id: cellId,
119+
document_id: documentId,
120+
record_timing: recordTiming
121+
}
117122
})
118123
};
119124
onCellExecutionScheduled({ cell });

0 commit comments

Comments
 (0)