From b4f52c2a1d1f755e795985d10ba9a6f687d6f10d Mon Sep 17 00:00:00 2001 From: geopanther Date: Wed, 13 May 2026 17:10:37 +0200 Subject: [PATCH 1/2] feat(document): use matching markdown file as folder page content When a markdown file shares its stem with a sibling directory (e.g., sub-folder.md next to sub-folder/), use its content as the body of the folder page instead of creating an empty folder page and a separate content page. The markdown file is excluded from normal page processing to avoid duplication. Adapted from iamjackg/md2cf#141 by @sheyifan. --- mdfluence/document.py | 58 +++++++++++++++++++++++++++++- test_package/unit/test_document.py | 21 +++++++++++ 2 files changed, 78 insertions(+), 1 deletion(-) diff --git a/mdfluence/document.py b/mdfluence/document.py index 47ed981..cefeb31 100644 --- a/mdfluence/document.py +++ b/mdfluence/document.py @@ -155,6 +155,22 @@ def get_pages_from_directory( folder_data = dict() git_repo = GitRepository(file_path, use_gitignore=use_gitignore) + # First pass: identify markdown files used as folder content + # (a file like `subdir.md` next to a directory `subdir/` provides + # content for the folder page instead of being a standalone page) + files_used_as_folder_content: set[Path] = set() + for current_path, directories, file_names in os.walk(file_path): + current_path = Path(current_path).resolve() + if git_repo.is_ignored(current_path): + continue + for subdir in directories: + subdir_path = current_path / subdir + if git_repo.is_ignored(subdir_path): + continue + potential_file = current_path / f"{subdir}.md" + if potential_file.exists() and not git_repo.is_ignored(potential_file): + files_used_as_folder_content.add(potential_file.resolve()) + for current_path, directories, file_names in os.walk(file_path): current_path = Path(current_path).resolve() @@ -227,6 +243,39 @@ def get_pages_from_directory( folder_data[current_path]["title"] = folder_title + # Check for a matching markdown file that provides folder page content + folder_content_file = None + if current_path != base_path: + potential_file = current_path.parent / f"{current_path.name}.md" + if potential_file.resolve() in files_used_as_folder_content: + folder_content_file = potential_file + + folder_page_body = "" + folder_page_file_path = None + folder_page_attachments: list[Path] = [] + folder_page_relative_links: list[RelativeLink] = [] + + if folder_content_file is not None: + content_page = get_page_data_from_file_path( + folder_content_file, + strip_header=strip_header, + remove_text_newlines=remove_text_newlines, + enable_relative_links=enable_relative_links, + enable_emoji=enable_emoji, + convert_anchors=convert_anchors, + render_diagrams=render_diagrams, + mmdc_path=mmdc_path, + plantuml_path=plantuml_path, + ) + folder_page_body = content_page.body + folder_page_file_path = content_page.file_path + folder_page_attachments = content_page.attachments + folder_page_relative_links = content_page.relative_links + if content_page.title: + folder_title = content_page.title + parent_page_title = content_page.title + folder_data[current_path]["title"] = folder_title + if folder_title is not None and ( markdown_files or (directories and not skip_empty and not collapse_empty) ): @@ -234,11 +283,18 @@ def get_pages_from_directory( Page( title=folder_title, parent_title=folder_parent_title, - body="", + body=folder_page_body, + file_path=folder_page_file_path, + attachments=folder_page_attachments, + relative_links=folder_page_relative_links, ) ) for markdown_file in markdown_files: + # Skip files already used as folder content + if markdown_file.resolve() in files_used_as_folder_content: + continue + processed_page = get_page_data_from_file_path( markdown_file, strip_header=strip_header, diff --git a/test_package/unit/test_document.py b/test_package/unit/test_document.py index 70e479b..7aa63ac 100644 --- a/test_package/unit/test_document.py +++ b/test_package/unit/test_document.py @@ -227,6 +227,27 @@ def test_get_pages_from_directory_with_pages_file_multi_level(fs): ] +def test_get_pages_from_directory_folder_content_from_matching_md(fs): + fs.create_file( + "/root-folder/sub-folder.md", contents="# Sub Folder Intro\n\nWelcome!" + ) + fs.create_file("/root-folder/sub-folder/child-page.md", contents="# Child") + + result = doc.get_pages_from_directory(Path("/root-folder")) + assert result == [ + FakePage( + title="Sub Folder Intro", + body="

Sub Folder Intro

\n

Welcome!

\n", + file_path=Path("/root-folder/sub-folder.md"), + ), + FakePage( + title="Child", + file_path=Path("/root-folder/sub-folder/child-page.md"), + parent_title="Sub Folder Intro", + ), + ] + + def test_get_pages_from_directory_with_pages_file_single_level(fs): fs.create_file("/root-folder/some-page.md") fs.create_file("/root-folder/.pages", contents='title: "Root folder"') From 81b04cdc719e514f2db87b74798ff37902bf0d14 Mon Sep 17 00:00:00 2001 From: geopanther Date: Wed, 13 May 2026 18:21:05 +0200 Subject: [PATCH 2/2] feat(document): use matching markdown file as folder page content When a markdown file shares its stem with a sibling directory (e.g., sub-folder.md next to sub-folder/), use its content as the body of the folder page instead of creating an empty folder page and a separate content page. Opt-in via --use-folder-content-file flag. Labels from the matching file's front matter are passed through to the folder page. Adapted from iamjackg/md2cf#141 by @sheyifan. --- mdfluence/__main__.py | 7 +++++++ mdfluence/document.py | 33 +++++++++++++++++------------- test_package/unit/test_document.py | 18 +++++++++++++++- 3 files changed, 43 insertions(+), 15 deletions(-) diff --git a/mdfluence/__main__.py b/mdfluence/__main__.py index 3c41cc5..eebee26 100644 --- a/mdfluence/__main__.py +++ b/mdfluence/__main__.py @@ -199,6 +199,12 @@ def get_parser(): help='use the "title" entry in YAML files called .pages in each ' "directory to change the folder name", ) + dir_group.add_argument( + "--use-folder-content-file", + action="store_true", + help="when a markdown file shares its name with a sibling directory " + "(e.g. sub-folder.md next to sub-folder/), use it as the folder page content", + ) empty_group = dir_group.add_mutually_exclusive_group() empty_group.add_argument( @@ -741,6 +747,7 @@ def collect_pages_to_upload(args): render_diagrams=args.render_diagrams, mmdc_path=args.mmdc_path, plantuml_path=args.plantuml_path, + use_folder_content_file=args.use_folder_content_file, ) else: try: diff --git a/mdfluence/document.py b/mdfluence/document.py index cefeb31..6f26359 100644 --- a/mdfluence/document.py +++ b/mdfluence/document.py @@ -130,6 +130,7 @@ def get_pages_from_directory( render_diagrams: bool = False, mmdc_path: str | None = None, plantuml_path: str | None = None, + use_folder_content_file: bool = False, ) -> List[Page]: """ Collect a list of markdown files recursively under the file_path directory. @@ -148,6 +149,8 @@ def get_pages_from_directory( placeholders :param skip_subtrees_wo_markdown: skip directory subtrees that contain no markdown files + :param use_folder_content_file: when a markdown file shares its stem with a sibling + directory (e.g. subdir.md next to subdir/), use its content as the folder page body :return: A list of paths to the markdown files to upload. """ processed_pages = list() @@ -155,21 +158,20 @@ def get_pages_from_directory( folder_data = dict() git_repo = GitRepository(file_path, use_gitignore=use_gitignore) - # First pass: identify markdown files used as folder content - # (a file like `subdir.md` next to a directory `subdir/` provides - # content for the folder page instead of being a standalone page) + # Pre-scan: identify markdown files used as folder content (single walk) files_used_as_folder_content: set[Path] = set() - for current_path, directories, file_names in os.walk(file_path): - current_path = Path(current_path).resolve() - if git_repo.is_ignored(current_path): - continue - for subdir in directories: - subdir_path = current_path / subdir - if git_repo.is_ignored(subdir_path): + if use_folder_content_file: + for current_path, directories, file_names in os.walk(file_path): + current_path = Path(current_path).resolve() + if git_repo.is_ignored(current_path): continue - potential_file = current_path / f"{subdir}.md" - if potential_file.exists() and not git_repo.is_ignored(potential_file): - files_used_as_folder_content.add(potential_file.resolve()) + for subdir in directories: + subdir_path = current_path / subdir + if git_repo.is_ignored(subdir_path): + continue + potential_file = current_path / f"{subdir}.md" + if potential_file.exists() and not git_repo.is_ignored(potential_file): + files_used_as_folder_content.add(potential_file.resolve()) for current_path, directories, file_names in os.walk(file_path): current_path = Path(current_path).resolve() @@ -245,7 +247,7 @@ def get_pages_from_directory( # Check for a matching markdown file that provides folder page content folder_content_file = None - if current_path != base_path: + if use_folder_content_file and current_path != base_path: potential_file = current_path.parent / f"{current_path.name}.md" if potential_file.resolve() in files_used_as_folder_content: folder_content_file = potential_file @@ -254,6 +256,7 @@ def get_pages_from_directory( folder_page_file_path = None folder_page_attachments: list[Path] = [] folder_page_relative_links: list[RelativeLink] = [] + folder_page_labels: list[str] | None = None if folder_content_file is not None: content_page = get_page_data_from_file_path( @@ -271,6 +274,7 @@ def get_pages_from_directory( folder_page_file_path = content_page.file_path folder_page_attachments = content_page.attachments folder_page_relative_links = content_page.relative_links + folder_page_labels = content_page.labels if content_page.title: folder_title = content_page.title parent_page_title = content_page.title @@ -287,6 +291,7 @@ def get_pages_from_directory( file_path=folder_page_file_path, attachments=folder_page_attachments, relative_links=folder_page_relative_links, + labels=folder_page_labels, ) ) diff --git a/test_package/unit/test_document.py b/test_package/unit/test_document.py index 7aa63ac..eeb9240 100644 --- a/test_package/unit/test_document.py +++ b/test_package/unit/test_document.py @@ -233,7 +233,9 @@ def test_get_pages_from_directory_folder_content_from_matching_md(fs): ) fs.create_file("/root-folder/sub-folder/child-page.md", contents="# Child") - result = doc.get_pages_from_directory(Path("/root-folder")) + result = doc.get_pages_from_directory( + Path("/root-folder"), use_folder_content_file=True + ) assert result == [ FakePage( title="Sub Folder Intro", @@ -248,6 +250,20 @@ def test_get_pages_from_directory_folder_content_from_matching_md(fs): ] +def test_get_pages_from_directory_folder_content_disabled_by_default(fs): + """Without --use-folder-content-file, matching .md is treated as normal page.""" + fs.create_file( + "/root-folder/sub-folder.md", contents="# Sub Folder Intro\n\nWelcome!" + ) + fs.create_file("/root-folder/sub-folder/child-page.md", contents="# Child") + + result = doc.get_pages_from_directory(Path("/root-folder")) + # sub-folder.md should appear as a standalone page, folder page should be empty + titles = [p.title for p in result] + assert "Sub Folder Intro" in titles + assert "sub-folder" in titles # folder page uses dir name + + def test_get_pages_from_directory_with_pages_file_single_level(fs): fs.create_file("/root-folder/some-page.md") fs.create_file("/root-folder/.pages", contents='title: "Root folder"')