Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 34 additions & 1 deletion src/Elastic.Markdown/DocumentationGenerator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

using System.IO.Abstractions;
using System.Text.Json;
using System.Text.RegularExpressions;
using Elastic.Documentation;
using Elastic.Documentation.Configuration;
using Elastic.Documentation.Configuration.LegacyUrlMappings;
Expand Down Expand Up @@ -37,7 +38,7 @@ public record GenerationResult
public IReadOnlyDictionary<string, LinkRedirect> Redirects { get; set; } = new Dictionary<string, LinkRedirect>();
}

public class DocumentationGenerator
public partial class DocumentationGenerator
{
private readonly IDocumentationFileOutputProvider? _documentationFileOutputProvider;
private readonly IConversionCollector? _conversionCollector;
Expand Down Expand Up @@ -231,6 +232,29 @@ private async Task ExtractEmbeddedStaticResources(Cancel ctx)
}
}

[GeneratedRegex(@"^[a-z0-9\s\-_\.\/\\]*[a-z0-9_\-]\.([a-z]+)$")]
private static partial Regex FilePathRegex();

[GeneratedRegex(@"^[a-z0-9_][a-z0-9_\-\s\.]*?\.([a-z]+)$")]
private static partial Regex FileNameRegex();

public static bool IsValidFileName(string strToCheck) =>
strToCheck switch
{
//prior art
_ when strToCheck.StartsWith("release-notes/elastic-agent/_snippets/") => true,
_ when strToCheck.StartsWith("reference/query-languages/esql/_snippets/") => true,
_ when strToCheck.EndsWith(".svg") => true,
_ when strToCheck.EndsWith(".gif") => true,
_ when strToCheck.EndsWith(".png") => true,
_ when strToCheck.EndsWith(".png") => true,
"reference/security/prebuilt-rules/audit_policies/windows/README.md" => true,
"extend/integrations/developer-workflow-fleet-UI.md" => true,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if directly related to this change, but docs builds in the integrations repository are failing since around the time it was merged. With the following error:

Error: File name extend/developer-workflow-fleet-UI.md is not valid needs to be lowercase and contain only alphanumeric characters, spaces, dashes, dots and underscores

First failed build is this one: https://github.com/elastic/integrations/actions/runs/20336771987/job/58425304911

Should this path be modified like this?

Suggested change
"extend/integrations/developer-workflow-fleet-UI.md" => true,
"extend/developer-workflow-fleet-UI.md" => true,

cc @elastic/ecosystem @elastic/docs-engineering

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for pointing this out @jsoriano !

I validated the global site build (which uses prefixed paths) but isolated builds (on PR's) use the local paths. Included both now as per:

#2410

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks!

"reference/elasticsearch/clients/ruby/Helpers.md" => true,
"explore-analyze/ai-features/llm-guides/connect-to-vLLM.md" => true,
_ => FilePathRegex().IsMatch(strToCheck) && FileNameRegex().IsMatch(Path.GetFileName(strToCheck))
};

private async Task ProcessFile(HashSet<string> offendingFiles, DocumentationFile file, DateTimeOffset outputSeenChanges, Cancel ctx)
{
if (!Context.Force)
Expand All @@ -243,8 +267,16 @@ private async Task ProcessFile(HashSet<string> offendingFiles, DocumentationFile

_logger.LogTrace("--> {FileFullPath}", file.SourceFile.FullName);
var outputFile = OutputFile(file.RelativePath);

if (outputFile is not null)
{
var relative = Path.GetRelativePath(Context.OutputDirectory.FullName, outputFile.FullName);
if (!IsValidFileName(relative))
{
Context.Collector.EmitError(file.SourceFile.FullName, $"File name {relative} is not valid needs to be lowercase and contain only alphanumeric characters, spaces, dashes, dots and underscores");
return;
}

var context = new ProcessingFileContext
{
BuildContext = Context,
Expand Down Expand Up @@ -368,4 +400,5 @@ public async Task<RenderResult> RenderLayout(MarkdownFile markdown, Cancel ctx)
await DocumentationSet.ResolveDirectoryTree(ctx);
return await HtmlWriter.RenderLayout(markdown, ctx);
}

}
270 changes: 270 additions & 0 deletions tests/Elastic.Markdown.Tests/OutputDirectoryTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,276 @@ public async Task CreatesDefaultOutputDirectory()
await collector.StopAsync(TestContext.Current.CancellationToken);

fileSystem.Directory.Exists(".artifacts").Should().BeTrue();
}

[Theory]
[MemberData(nameof(ValidFileNames))]
public void OutputFileValidationValidNames(string fileName)
{
var valid = DocumentationGenerator.IsValidFileName(fileName);
valid.Should().BeTrue($"'{fileName}' should be a valid filename");
}

[Theory]
[MemberData(nameof(InvalidFileNames))]
public void OutputFileValidationInvalidNames(string fileName)
{
var valid = DocumentationGenerator.IsValidFileName(fileName);
valid.Should().BeFalse($"'{fileName}' should be an invalid filename");
}

public static TheoryData<string> ValidFileNames =>
[
"test.md",
"file.txt",
"index.html",
"readme.rst",

// With numbers
"test123.md",
"123test.md",
"file2.md",
"99bottles.md",

// With underscores
"test_file.md",
"my_long_file_name.md",
"_leading_underscore.md",
"trailing_underscore_.md",

// With hyphens
"test-file.md",
"my-long-file-name.md",
"trailing-hyphen-.md",

// Combined underscores and hyphens
"test_file-name.md",
"my-file_name.md",

// With dots in filename (before extension)
"test.config.md",
"file.test.backup.md",
"v1.0.0.md",

// With spaces (allowed per regex)
"test file.md",
"my document.md",

// Paths with all lowercase directories
"path/to/file.md",
"deep/nested/path/to/file.md",
"folder/subfolder/document.md",

// Paths with numbers
"path123/file.md",
"v1/docs/guide.md",

// Paths with underscores and hyphens
"my_folder/file.md",
"my-folder/file.md",
"path_to/sub-folder/file.md",

// SVG files exception (even with uppercase - per the .EndsWith checks)
"image.svg",
"Icon.svg",
"LOGO.svg",
"path/to/Image.svg",

// PNG files exception
"image.png",
"Screenshot.png",
"IMAGE.png",
"path/to/Logo.png",

// GIF files exception
"animation.gif",
"Loading.gif",
"SPINNER.gif",

// ESQL snippets exception (prior art)
"reference/query-languages/esql/_snippets/functions/examples/cbrt.md",
"reference/query-languages/esql/_snippets/anything/here/File.md",
"reference/query-languages/esql/_snippets/UPPERCASE.md",

// Hardcoded exceptions
"reference/security/prebuilt-rules/audit_policies/windows/README.md",
"extend/integrations/developer-workflow-fleet-UI.md",
"reference/elasticsearch/clients/ruby/Helpers.md",
"explore-analyze/ai-features/llm-guides/connect-to-vLLM.md"
];

public static TheoryData<string> InvalidFileNames =>
[
"Test.md",
"FILE.md",
"MyFile.md",
"testFile.md",
"README.md",

// Uppercase in extension
"test.MD",
"test.Md",
"file.TXT",
"document.Html",

// Uppercase in directory path
"Path/file.md",
"path/To/file.md",
"FOLDER/file.md",
"docs/MyFolder/file.md",

// Filenames starting with invalid characters (must start with [a-z0-9_])
"-leading-hyphen.md",
"-file.md",
".hidden.md",
" leading-space.md",
"path/to/-invalid.md",
"path/to/.hidden.md",
"path/to/ space.md",

// Special characters - parentheses
"test(1).md",
"file (copy).md",
"document(v2).md",

// Special characters - square brackets
"test[1].md",
"file[copy].md",

// Special characters - curly braces
"test{1}.md",

// Special characters - exclamation mark
"test!.md",
"important!file.md",

// Special characters - at sign
"[email protected]",
"[email protected]",

// Special characters - hash
"test#1.md",
"file#.md",

// Special characters - dollar sign
"test$file.md",
"price$.md",

// Special characters - percent
"test%file.md",
"100%done.md",

// Special characters - caret
"test^file.md",

// Special characters - ampersand
"test&file.md",
"this&that.md",

// Special characters - asterisk
"test*file.md",
"*.md",

// Special characters - plus sign
"test+file.md",
"c++.md",

// Special characters - equals sign
"test=file.md",

// Special characters - pipe
"test|file.md",

// Special characters - less than / greater than
"test<file>.md",

// Special characters - colon
"test:file.md",

// Special characters - semicolon
"test;file.md",

// Special characters - single quote
"test'file.md",
"it's.md",

// Special characters - double quote
"test\"file.md",

// Special characters - backtick
"test`file.md",

// Special characters - tilde
"test~file.md",
"~temp.md",

// Special characters - comma
"test,file.md",
"a,b,c.md",

// Special characters - question mark
"test?.md",
"what?.md",

// No extension
"testfile",
"README",
"Makefile",

// Just extension
".md",
".txt",

// Empty extension
"test.",

// Double extension edge cases with uppercase
"test.Config.md",
"file.Test.md",

// Non-ASCII characters - accented
"tëst.md",
"café.md",
"naïve.md",
"résumé.md",

// Non-ASCII characters - other alphabets
"тест.md",
"测试.md",
"テスト.md",

// Non-ASCII characters - symbols
"test™.md",
"file©.md",

// Empty string
"",

// Whitespace only
" ",

// Extension only variations
"..md",

// Numbers in extension (if we expect only letters)
"test.md5",
"file.mp3",
"video.mp4",

// CamelCase variations
"camelCase.md",
"PascalCase.md",
"mixedCASE.md",

// Acronyms
"API.md",
"HTTP.md",
"XMLParser.md",

// Common problematic filenames
"CHANGELOG.md",
"LICENSE.md",
"CONTRIBUTING.md",
"TODO.md"
];
}
Loading