Skip to content

Commit 5d9fb3f

Browse files
committed
add function to concatenate HTML files, such as reports
1 parent 7e9d963 commit 5d9fb3f

File tree

4 files changed

+140
-0
lines changed

4 files changed

+140
-0
lines changed
+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add functionality to concatenate HTML files, such as mne.Reports, by `Roy Eric Wieske`_.

mne/report/report.py

+88
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from shutil import copyfile
2424

2525
import numpy as np
26+
from bs4 import BeautifulSoup, Comment, Tag
2627

2728
from .. import __version__ as MNE_VERSION
2829
from .._fiff.meas_info import Info, read_info
@@ -675,6 +676,93 @@ def open_report(fname, **params):
675676
return report
676677

677678

679+
def concatenate_reports(html_files, output_file):
680+
"""Concatenate multiple HTML files into one.
681+
682+
This function reads multiple HTML files, extracts their <head> and <body> sections,
683+
and combines them into a single HTML file. It also handles duplicate IDs in the body
684+
by removing them.
685+
686+
Parameters
687+
----------
688+
html_files : list of str or Path
689+
List of paths to the HTML files to be concatenated.
690+
output_file : str or Path
691+
Path to the output HTML file.
692+
693+
Returns
694+
-------
695+
final_html : BeautifulSoup
696+
A BeautifulSoup object representing the combined HTML content.
697+
"""
698+
combined_head = BeautifulSoup("<head></head>", "lxml").head
699+
combined_body = BeautifulSoup("<body></body>", "lxml").body
700+
used_ids = set()
701+
702+
for file in html_files:
703+
file = Path(file)
704+
with open(file, encoding="utf-8") as f:
705+
soup = BeautifulSoup(f, "lxml")
706+
707+
toc = soup.find(id="toc") or soup.find(class_="toc")
708+
if toc:
709+
toc.decompose()
710+
711+
# handle head
712+
if soup.head:
713+
seen_styles = set()
714+
seen_links = set()
715+
seen_scripts = set()
716+
717+
for tag in soup.head.find_all(["script", "link", "style"], recursive=True):
718+
tag_str = str(tag)
719+
if tag.name == "style":
720+
if tag_str in seen_styles:
721+
continue
722+
seen_styles.add(tag_str)
723+
elif tag.name == "link":
724+
if tag_str in seen_links:
725+
continue
726+
seen_links.add(tag_str)
727+
elif tag.name == "script":
728+
if tag_str in seen_scripts:
729+
continue
730+
seen_scripts.add(tag_str)
731+
732+
combined_head.append(tag)
733+
734+
# handle body
735+
if soup.body:
736+
section = soup.new_tag("section")
737+
section.append(soup.new_tag("hr"))
738+
section.append(Comment(f"START {file.name}"))
739+
740+
for tag in soup.body.contents:
741+
if isinstance(tag, Tag):
742+
for t in tag.find_all(True):
743+
id_ = t.get("id")
744+
if id_:
745+
if id_ in used_ids:
746+
del t["id"]
747+
else:
748+
used_ids.add(id_)
749+
750+
section.append(tag)
751+
752+
combined_body.append(section)
753+
754+
# create final HTML
755+
final_html = BeautifulSoup("<html></html>", "lxml")
756+
final_html.html.append(combined_head)
757+
final_html.html.append(combined_body)
758+
759+
output_file = Path(output_file)
760+
with output_file.open("w", encoding="utf-8") as f:
761+
f.write(final_html.prettify())
762+
763+
return final_html
764+
765+
678766
###############################################################################
679767
# HTML scan renderer
680768

mne/report/tests/test_report.py

+50
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
import numpy as np
1515
import pytest
16+
from bs4 import BeautifulSoup
1617
from matplotlib import pyplot as plt
1718

1819
from mne import (
@@ -33,6 +34,7 @@
3334
from mne.report.report import (
3435
_ALLOWED_IMAGE_FORMATS,
3536
CONTENT_ORDER,
37+
concatenate_reports,
3638
)
3739
from mne.utils import Bunch, _record_warnings
3840
from mne.utils._testing import assert_object_equal
@@ -634,6 +636,54 @@ def test_open_report(tmp_path):
634636
assert h5io.read_hdf5(hdf5, title="companion") == "test"
635637

636638

639+
def test_concatenate_reports(tmp_path, sample_meg_dir):
640+
"""Test the concatenate_reports function."""
641+
raw_path = sample_meg_dir / "sample_audvis_raw.fif"
642+
raw = read_raw_fif(raw_path, preload=True)
643+
raw.set_annotations(None)
644+
raw.crop(0, 20)
645+
646+
with tmp_path as tmp_dir:
647+
tmp_path = Path(tmp_dir)
648+
649+
# Report 1 with custom content
650+
report1 = Report(title="Report eeg_preprocessing #1")
651+
report1.add_html(
652+
"<div class='custom-note'>This is report one</div>", title="Note 1"
653+
)
654+
report1.add_raw(raw, title="Raw data", psd=False)
655+
report1.save(tmp_path / "report1.html", overwrite=True, open_browser=False)
656+
657+
# Report 2 with different custom content
658+
report2 = Report(title="Report eeg_preprocessing #2")
659+
report2.add_html(
660+
"<div class='custom-note'>This is report two</div>", title="Note 2"
661+
)
662+
report2.add_raw(raw, title="Raw data", psd=False)
663+
report2.save(tmp_path / "report2.html", overwrite=True, open_browser=False)
664+
665+
file1 = tmp_path / "report1.html"
666+
file2 = tmp_path / "report2.html"
667+
output_file = tmp_path / "combined.html"
668+
669+
_ = concatenate_reports([file1, file2], output_file)
670+
671+
assert output_file.exists()
672+
673+
with open(output_file, encoding="utf-8") as f:
674+
out_html = BeautifulSoup(f, "lxml")
675+
676+
assert out_html.head is not None
677+
assert out_html.body is not None
678+
679+
styles = out_html.find_all("style")
680+
scripts = out_html.find_all("script")
681+
divs = out_html.find_all("div")
682+
683+
assert out_html.find(text=lambda t: "This is report one" in t) is not None
684+
assert out_html.find(text=lambda t: "This is report two" in t) is not None
685+
686+
637687
def test_remove():
638688
"""Test removing figures from a report."""
639689
r = Report()

pyproject.toml

+1
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ full = ["mne[full-no-qt]", "PyQt6 != 6.6.0", "PyQt6-Qt6 != 6.6.0, != 6.7.0"]
8888
# and mne[full-pyside6], which will install PySide6 instead of PyQt6.
8989
full-no-qt = [
9090
"antio >= 0.5.0",
91+
"beautifulsoup4",
9192
"darkdetect",
9293
"defusedxml",
9394
"dipy",

0 commit comments

Comments
 (0)