diff --git a/.github/workflows/markdown-parser.yml b/.github/workflows/markdown-parser.yml new file mode 100644 index 00000000..e5a3f3ad --- /dev/null +++ b/.github/workflows/markdown-parser.yml @@ -0,0 +1,112 @@ +name: Markdown parser tests + +on: + pull_request: + push: + branches: ["master"] + workflow_dispatch: + +jobs: + test: + runs-on: ubuntu-20.04 + + steps: + - name: Checkout benchmarks project + uses: actions/checkout@v2 + with: + path: benchmarks + + # - name: Install node + # uses: actions/setup-node@v2 + # with: + # node-version: "14.x" + + - name: Install Python + uses: actions/setup-python@v2 + with: + python-version: 3.8 + + - name: Cache pip on Linux + uses: actions/cache@v1 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-3.8-${{ hashFiles('**/requirements.txt', 'setup.cfg') }} + restore-keys: | + ${{ runner.os }}-pip-3.8 + + # - name: Get yarn cache directory path + # id: yarn-cache-dir-path + # run: echo "::set-output name=dir::$(yarn cache dir)" + # - name: Cache yarn + # uses: actions/cache@v1 + # id: yarn-cache # use this to check for `cache-hit` (`steps.yarn-cache.outputs.cache-hit != 'true'`) + # with: + # path: ${{ steps.yarn-cache-dir-path.outputs.dir }} + # key: ${{ runner.os }}-yarn-${{ hashFiles('**/yarn.lock') }} + # restore-keys: | + # ${{ runner.os }}-yarn- + + - name: Install dependencies + working-directory: benchmarks/markdown-parser + run: | + set -ex + python -m pip install -r requirements.txt + python -m playwright install chromium + + # - name: Checkout JupyterLab + # uses: actions/checkout@v2 + # with: + # repository: jupyterlab/jupyterlab + # ref: master + # path: reference + + # - name: Install dependencies + # run: | + # set -ex + # echo "OLD_REF_SHA=$(git log -n1 --format='%H')" >> $GITHUB_ENV + # bash ./scripts/ci_install.sh + # # Build dev mode + # jlpm run build + # working-directory: reference + + - name: Launch JupyterLab + shell: bash + run: | + set -ex + python -m jupyterlab --config jupyter_lab_config.py 2>&1 > /tmp/jupyterlab_server.log & + working-directory: benchmarks/markdown-parser + + - name: Wait for JupyterLab + uses: ifaxity/wait-on-action@v1 + with: + resource: http-get://localhost:9999/lab + timeout: 360000 + + - name: Tests + working-directory: benchmarks/markdown-parser + run: | + set -ex + mkdir -p reports + # For now limit ot JupyterLab vs GFM + pytest -rap -vv --rootdir "$PWD" --base-url "http://localhost:9999" --report-dir "$PWD/reports" tests/test_jupyterlab.py + + - name: Upload pytest reports + if: always() + uses: actions/upload-artifact@v2 + with: + name: markdown-parser-report-${{ github.run_number }} + path: | + benchmarks/markdown-parser/reports + + - name: Kill the server + if: always() + shell: bash + run: | + kill -s SIGKILL $(pgrep jupyter-lab) + + - name: Print JupyterLab logs + if: always() + run: | + echo "::group::Server log" + cat /tmp/jupyterlab_server.log + echo "::endgroup::" diff --git a/markdown-parser/README.md b/markdown-parser/README.md new file mode 100644 index 00000000..d671d9c9 --- /dev/null +++ b/markdown-parser/README.md @@ -0,0 +1,8 @@ +# Markdown parser tests + +This folder contains tests to validate markdown parsers. + +The tests are written using [pytest](https://docs.pytest.org). To test web frontend parsers, [playwright](https://playwright.dev/python/docs/intro) is used to evaluate the markdown conversion to HTML. + +> The test database is downloaded from [GitHub flavored Commonmark](https://github.com/github/cmark-gfm). + diff --git a/markdown-parser/conftest.py b/markdown-parser/conftest.py new file mode 100644 index 00000000..d699ea1c --- /dev/null +++ b/markdown-parser/conftest.py @@ -0,0 +1,6 @@ +def pytest_addoption(parser): + """Add option to set the comparison reports""" + parser.addoption( + "--report-dir", + help="Directory in which the reports must be saved.", + ) diff --git a/markdown-parser/jupyter_lab_config.py b/markdown-parser/jupyter_lab_config.py new file mode 100644 index 00000000..d7e46661 --- /dev/null +++ b/markdown-parser/jupyter_lab_config.py @@ -0,0 +1,21 @@ +import getpass +from tempfile import mkdtemp + +# Test if we are running in a docker +if getpass.getuser() == "jovyan": + c.ServerApp.ip = "0.0.0.0" + +c.ServerApp.port = 9999 +c.ServerApp.open_browser = False + +c.ServerApp.root_dir = mkdtemp(prefix='markdown-parser-lab-') + +c.ServerApp.token = "" +c.ServerApp.password = "" +c.ServerApp.disable_check_xsrf = True + +# c.LabApp.dev_mode = True +# c.LabApp.extensions_in_dev_mode = True +c.LabApp.expose_app_in_browser = True + +c.RetroApp.expose_app_in_browser = True diff --git a/markdown-parser/requirements.txt b/markdown-parser/requirements.txt new file mode 100644 index 00000000..a5e1e7c9 --- /dev/null +++ b/markdown-parser/requirements.txt @@ -0,0 +1,6 @@ +jupyterlab[test] +nbconvert +playwright +pytest +pytest-playwright +requests diff --git a/markdown-parser/tests/__init__.py b/markdown-parser/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/markdown-parser/tests/conftest.py b/markdown-parser/tests/conftest.py new file mode 100644 index 00000000..76ead114 --- /dev/null +++ b/markdown-parser/tests/conftest.py @@ -0,0 +1,117 @@ +import os +import pathlib +import pytest +from playwright.sync_api import Error +from slugify import slugify + + +def _build_artifact_test_folder(pytestconfig, request, folder_or_file_name): + # Taken from pytest-playwright + output_dir = pytestconfig.getoption("--output") + return os.path.join(output_dir, slugify(request.node.nodeid), folder_or_file_name) + + +@pytest.fixture(scope="session") +def jupyterlab_page(browser, browser_context_args, pytestconfig, request): + # Merge pytest-playwright fixtures context and page at scope session and load the JupyterLab page + pages = [] + context = browser.new_context(**browser_context_args) + context.on("page", lambda page: pages.append(page)) + + tracing_option = pytestconfig.getoption("--tracing") + capture_trace = tracing_option in ["on", "retain-on-failure"] + if capture_trace: + context.tracing.start( + name=slugify(request.node.nodeid), + screenshots=True, + snapshots=True, + sources=True, + ) + + page = context.new_page() + page.goto("/lab") + yield page + + # If request.node is missing rep_call, then some error happened during execution + # that prevented teardown, but should still be counted as a failure + failed = request.node.rep_call.failed if hasattr(request.node, "rep_call") else True + + if capture_trace: + retain_trace = tracing_option == "on" or ( + failed and tracing_option == "retain-on-failure" + ) + if retain_trace: + trace_path = _build_artifact_test_folder(pytestconfig, request, "trace.zip") + context.tracing.stop(path=trace_path) + else: + context.tracing.stop() + + screenshot_option = pytestconfig.getoption("--screenshot") + capture_screenshot = screenshot_option == "on" or ( + failed and screenshot_option == "only-on-failure" + ) + if capture_screenshot: + for index, page in enumerate(pages): + human_readable_status = "failed" if failed else "finished" + screenshot_path = _build_artifact_test_folder( + pytestconfig, request, f"test-{human_readable_status}-{index+1}.png" + ) + try: + page.screenshot(timeout=5000, path=screenshot_path) + except Error: + pass + + context.close() + + video_option = pytestconfig.getoption("--video") + preserve_video = video_option == "on" or ( + failed and video_option == "retain-on-failure" + ) + if preserve_video: + for page in pages: + video = page.video + if not video: + continue + try: + video_path = video.path() + file_name = os.path.basename(video_path) + video.save_as( + path=_build_artifact_test_folder(pytestconfig, request, file_name) + ) + except Error: + # Silent catch empty videos. + pass + + +@pytest.fixture(scope="module") +def md_report(request): + """Generate a comparison report for each test module. + + Each test must return a dictionary with the same keys. Each keys will be + a table header and each test will be a table row. + """ + test_reports = [] + + yield test_reports + + if len(test_reports[0]) > 0: + filename = pathlib.Path(request.config.getoption("report_dir")) / ( + request.module.__name__.replace(".", "_") + "_report.md" + ) + + with filename.open("w") as f: + headers = test_reports[0] + f.writelines( + [ + f"# {request.module.__name__}\n", + "\n", + "| " + " | ".join(headers) + " |\n", + "| " + " | ".join(["---"] * len(headers)) + " |\n", + ] + ) + f.writelines( + map( + lambda e: "| " + " | ".join(map(str, e.values())) + " |\n", + test_reports, + ) + ) diff --git a/markdown-parser/tests/test_jupyter.py b/markdown-parser/tests/test_jupyter.py new file mode 100644 index 00000000..d72b5924 --- /dev/null +++ b/markdown-parser/tests/test_jupyter.py @@ -0,0 +1,32 @@ +import pytest +from nbconvert.filters.markdown import markdown2html_mistune + +from .utils import commonmark_gfm_tests, get_jupyterlab_rendered_markdown + + +@pytest.mark.parametrize("gfm", commonmark_gfm_tests()) +def test_nbconvert_jupyterlab(jupyterlab_page, md_report, gfm): + + # Import normalize helper from github/cmark-gfm through its addition to sys.path in commonmark_gfm_tests + from normalize import normalize_html + + given = gfm["markdown"] + test = normalize_html(markdown2html_mistune(given)) + ref = normalize_html(jupyterlab_page.evaluate(get_jupyterlab_rendered_markdown, gfm["markdown"])) + + success = True + try: + assert test == ref + except Exception as e: + success = False + raise e + finally: + md_report.append({ + "id": gfm["example"], + "section": gfm["section"], + "failed": "" if success else "X", + "markdown": repr(given).replace("'", "`"), + "JupyterLab": repr(ref).replace("'", "`"), + "nbconvert - mistune": repr(test).replace("'", "`"), + "comments": "" + }) diff --git a/markdown-parser/tests/test_jupyterlab.py b/markdown-parser/tests/test_jupyterlab.py new file mode 100644 index 00000000..7f94cbb4 --- /dev/null +++ b/markdown-parser/tests/test_jupyterlab.py @@ -0,0 +1,33 @@ +import pytest + +from .utils import commonmark_gfm_tests, get_jupyterlab_rendered_markdown + + +@pytest.mark.parametrize("gfm", commonmark_gfm_tests()) +def test_gfm_jupyterlab_renderer(jupyterlab_page, md_report, gfm): + + # Import normalize helper from github/cmark-gfm through its addition to sys.path in commonmark_gfm_tests + from normalize import normalize_html + + given = gfm["markdown"] + test = normalize_html(jupyterlab_page.evaluate(get_jupyterlab_rendered_markdown, gfm["markdown"])) + ref = normalize_html(gfm["html"]) + + success = True + try: + assert test == ref + except Exception as e: + success = False + raise e + finally: + md_report.append( + { + "id": gfm["example"], + "section": gfm["section"], + "failed": "" if success else "X", + "markdown": repr(given).replace("'", "`"), + "commonmark-gfm": repr(ref).replace("'", "`"), + "JupyterLab": repr(test).replace("'", "`"), + "comments": "", + } + ) diff --git a/markdown-parser/tests/test_nbconvert.py b/markdown-parser/tests/test_nbconvert.py new file mode 100644 index 00000000..205588a2 --- /dev/null +++ b/markdown-parser/tests/test_nbconvert.py @@ -0,0 +1,34 @@ +import pytest +from nbconvert.filters.markdown import markdown2html_mistune + +from .utils import commonmark_gfm_tests + + +@pytest.mark.parametrize("gfm", commonmark_gfm_tests()) +def test_gfm_nbconvert_markdown2html(md_report, gfm): + + # Import normalize helper from github/cmark-gfm through its addition to sys.path in commonmark_gfm_tests + from normalize import normalize_html + + given = gfm["markdown"] + test = normalize_html(markdown2html_mistune(given)) + ref = normalize_html(gfm["html"]) + + success = True + try: + assert test == ref + except Exception as e: + success = False + raise e + finally: + md_report.append( + { + "id": gfm["example"], + "section": gfm["section"], + "failed": "" if success else "X", + "markdown": repr(given).replace("'", "`"), + "commonmark-gfm": repr(ref).replace("'", "`"), + "nbconvert - mistune": repr(test).replace("'", "`"), + "comments": "", + } + ) diff --git a/markdown-parser/tests/utils.py b/markdown-parser/tests/utils.py new file mode 100644 index 00000000..96c9519a --- /dev/null +++ b/markdown-parser/tests/utils.py @@ -0,0 +1,60 @@ +import functools +from pathlib import Path +import sys +import tempfile +import zipfile +import requests + +# Commonmark GitHub flavored project +COMMONMARK_GFM_URL = "https://github.com/github/cmark-gfm/archive/refs/heads/master.zip" +SPEC_EXTRACTOR = "cmark-gfm-master/test/spec_tests.py" +SPEC_FILES = ["spec.txt"] + + +@functools.lru_cache(1) +def commonmark_gfm_tests(): + tests = [] + r = requests.get(COMMONMARK_GFM_URL) + + with tempfile.TemporaryDirectory() as tmpdir: + with tempfile.NamedTemporaryFile(mode="wb", suffix=".zip") as tfile: + tfile.write(r.content) + + with zipfile.ZipFile(tfile.name, mode="r") as fzip: + fzip.extractall(tmpdir) + + tmp_path = Path(tmpdir) + test_folder = (tmp_path / SPEC_EXTRACTOR).parent + + sys.path.insert(0, str(test_folder)) + + from spec_tests import get_tests + from normalize import normalize_html + + for testfile in SPEC_FILES: + tests.extend(get_tests(str(test_folder / testfile))) + + sys.path.remove(str(test_folder)) + + return tests + +# Javascript function to render a markdown string `md` +# to HTML using the JupyterLab parser. +get_jupyterlab_rendered_markdown = """async (md) => { + const app = window.jupyterlab ?? window.jupyterapp; + + const pluginId = '@jupyterlab/rendermime-extension:plugin'; + + const plugin = app._pluginMap[pluginId]; + + if (!plugin.activated) { + await app.activatePlugin(pluginId); + } + + const renderer = plugin.service.createRenderer('text/markdown'); + await renderer.renderModel( + plugin.service.createModel({ data: { 'text/markdown': md } }) + ); + + return renderer.node.innerHTML; +}"""