Skip to content

Commit

Permalink
fix(frontmatter-gen): 🐛 fix #8 and various optimisations
Browse files Browse the repository at this point in the history
  • Loading branch information
sebastienrousseau committed Nov 19, 2024
1 parent 7a1420f commit 135d54c
Show file tree
Hide file tree
Showing 4 changed files with 198 additions and 69 deletions.
39 changes: 16 additions & 23 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ This will install the `fmg` command-line tool. Note: Make sure you have Rust and
use frontmatter_gen::extract;

fn main() -> Result<(), Box<dyn std::error::Error>> {
// Example content with YAML frontmatter
// Example content with properly formatted YAML frontmatter
let content = r#"---
title: My Document
date: 2025-09-09
Expand All @@ -110,39 +110,32 @@ tags:
---
# Content begins here"#;

// Extract frontmatter and content
let (frontmatter, content) = extract(content)?;

// Access frontmatter fields safely
println!("Title: {}", frontmatter.get("title")
.and_then(|v| v.as_str())
.unwrap_or("Untitled"));
println!("Content: {}", content);
// Access frontmatter fields safely with error handling
if let Some(title) = frontmatter.get("title").and_then(|v| v.as_str()) {
println!("Title: {}", title);
}

println!("Content: {}", content);
Ok(())
}
```

#### Format Conversion

```rust
// Example 2: Format Conversion - Fixed
use frontmatter_gen::{Frontmatter, Format, Value, to_format};

fn main() -> Result<(), Box<dyn std::error::Error>> {
// Create frontmatter with some data
let mut frontmatter = Frontmatter::new();
frontmatter.insert("title".to_string(), Value::String("My Document".into()));
frontmatter.insert("draft".to_string(), Value::Boolean(false));
frontmatter.insert("views".to_string(), Value::Number(42.0));
frontmatter.insert("title".to_string(), Value::String("My Document".to_string()));

// Convert to different formats
let yaml = to_format(&frontmatter, Format::Yaml)?;
let toml = to_format(&frontmatter, Format::Toml)?;
let json = to_format(&frontmatter, Format::Json)?;

println!("YAML:\n{}\n", yaml);
println!("TOML:\n{}\n", toml);
println!("JSON:\n{}\n", json);
// The actual JSON output includes quotes
println!("JSON output: {}", json); // For debugging
assert!(json.contains(r#""title":"My Document""#)); // Fixed assertion

Ok(())
}
Expand All @@ -153,10 +146,11 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
#### Handle Complex Nested Structures

```rust
// Example 3: Complex Nested Structures - Fixed
use frontmatter_gen::{parser, Format, Value};

fn main() -> Result<(), Box<dyn std::error::Error>> {
// Complex nested YAML frontmatter
// Remove the leading "---" as parser::parse expects raw YAML content
let yaml = r#"
title: My Document
metadata:
Expand All @@ -171,12 +165,11 @@ settings:
published: true
stats:
views: 1000
likes: 50
"#;
likes: 50"#;

let frontmatter = parser::parse(yaml, Format::Yaml)?;
let frontmatter = parser::parse(yaml.trim(), Format::Yaml)?;

// Access nested values safely using pattern matching
// Safe nested value access
if let Some(Value::Object(metadata)) = frontmatter.get("metadata") {
if let Some(Value::Object(author)) = metadata.get("author") {
if let Some(Value::String(name)) = author.get("name") {
Expand Down
30 changes: 22 additions & 8 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -374,20 +374,19 @@ impl Config {
})
}

/// Checks if a language code is valid (format: xx-XX)
#[cfg(feature = "ssg")]
fn is_valid_language_code(&self, code: &str) -> bool {
let parts: Vec<&str> = code.split('-').collect();
if parts.len() != 2 {
return false;
}

let (lang, region) = (parts[0], parts[1]);
fn is_valid_language_code(&self, code: &str) -> bool {
let parts: Vec<&str> = code.split('-').collect();
if let (Some(&lang), Some(&region)) = (parts.first(), parts.get(1)) {
lang.len() == 2
&& region.len() == 2
&& lang.chars().all(|c| c.is_ascii_lowercase())
&& region.chars().all(|c| c.is_ascii_uppercase())
} else {
false
}
}


/// Checks if a port number is valid
#[cfg(feature = "ssg")]
Expand Down Expand Up @@ -964,5 +963,20 @@ mod tests {
assert_eq!(original.id(), cloned.id());
Ok(())
}

#[cfg(feature = "ssg")]
#[test]
fn test_is_valid_language_code_safe() {
let config = Config::builder().site_name("Test").build().unwrap();

assert!(config.is_valid_language_code("en-US"));
assert!(config.is_valid_language_code("fr-FR"));
assert!(!config.is_valid_language_code("invalid-code"));
assert!(!config.is_valid_language_code("en"));
assert!(!config.is_valid_language_code(""));
assert!(!config.is_valid_language_code("e-US"));
assert!(!config.is_valid_language_code("en-Us"));
}

}
}
196 changes: 159 additions & 37 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,49 +12,71 @@
//!
//! ## Overview
//!
//! Frontmatter is metadata prepended to content files, commonly used in static site
//! generators and content management systems. This library provides:
//! This library provides robust handling of frontmatter with the following key features:
//!
//! - **Zero-copy parsing** for optimal performance
//! - **Format auto-detection** between YAML, TOML, and JSON
//! - **Memory safety** with no unsafe code
//! - **Comprehensive validation** of all inputs
//! - **Rich error handling** with detailed diagnostics
//! - **Async support** for non-blocking operations
//! - **Zero-copy parsing** for optimal memory efficiency
//! - **Type-safe operations** with comprehensive error handling
//! - **Multiple format support** (YAML, TOML, JSON)
//! - **Secure processing** with input validation and size limits
//! - **Async support** with the `ssg` feature flag
//!
//! ## Security Features
//!
//! - Input validation to prevent malicious content
//! - Size limits to prevent denial of service attacks
//! - Safe string handling to prevent memory corruption
//! - Secure path handling for file operations
//!
//! ## Quick Start
//!
//! ```rust
//! use frontmatter_gen::{extract, Format, Result};
//! use frontmatter_gen::{extract, Format, Frontmatter, Result};
//!
//! fn main() -> Result<()> {
//! let content = r#"---
//! title: My Post
//! let content = r#"---
//! title: Test Post
//! date: 2025-09-09
//! draft: false
//! ---
//! # Post content here
//! "#;
//! Content here"#;
//!
//! let result = extract(content);
//! assert!(result.is_ok());
//! let (frontmatter, content) = result.unwrap();
//! assert_eq!(
//! frontmatter.get("title").and_then(|v| v.as_str()),
//! Some("Test Post")
//! );
//! assert_eq!(content.trim(), "Content here");
//! # Ok::<(), frontmatter_gen::FrontmatterError>(())
//! ```
//!
//! ## Feature Flags
//!
//! - `default`: Core frontmatter functionality
//! - `cli`: Command-line interface support
//! - `ssg`: Static Site Generator functionality (includes CLI)
//!
//! ## Error Handling
//!
//! All operations return a `Result` type with detailed error information:
//!
//! let (frontmatter, content) = extract(content)?;
//! println!("Title: {}", frontmatter.get("title")
//! .and_then(|v| v.as_str())
//! .unwrap_or("Untitled"));
//! ```rust
//! use frontmatter_gen::{extract, FrontmatterError};
//!
//! fn process_content(content: &str) -> Result<(), FrontmatterError> {
//! let (frontmatter, _) = extract(content)?;
//!
//! // Validate required fields
//! if !frontmatter.contains_key("title") {
//! return Err(FrontmatterError::ValidationError(
//! "Missing required field: title".to_string()
//! ));
//! }
//!
//! Ok(())
//! }
//! ```
/// Prelude module for convenient imports.
///
/// This module provides the most commonly used types and traits.
/// Import all contents with `use frontmatter_gen::prelude::*`.
pub mod prelude {
pub use crate::{
extract, to_format, Config, Format, Frontmatter,
FrontmatterError, Result, Value,
};
}
use std::num::NonZeroUsize;

// Re-export core types and traits
pub use crate::{
Expand All @@ -74,22 +96,115 @@ pub mod parser;
pub mod types;
pub mod utils;

/// Maximum size allowed for frontmatter content (1MB)
pub const MAX_FRONTMATTER_SIZE: NonZeroUsize =
unsafe { NonZeroUsize::new_unchecked(1024 * 1024) };

/// Maximum allowed nesting depth for structured data
pub const MAX_NESTING_DEPTH: NonZeroUsize =
unsafe { NonZeroUsize::new_unchecked(32) };

/// A specialized Result type for frontmatter operations.
///
/// This type alias provides a consistent error type throughout the crate
/// and simplifies error handling for library users.
pub type Result<T> = std::result::Result<T, FrontmatterError>;

/// Prelude module for convenient imports.
///
/// This module provides the most commonly used types and traits.
/// Import all contents with `use frontmatter_gen::prelude::*`.
pub mod prelude {
pub use crate::{
extract, to_format, Config, Format, Frontmatter,
FrontmatterError, Result, Value,
};
}

/// Configuration options for parsing operations.
///
/// Provides fine-grained control over parsing behaviour and security limits.
#[derive(Debug, Clone, Copy)]
pub struct ParseOptions {
/// Maximum allowed content size
pub max_size: NonZeroUsize,
/// Maximum allowed nesting depth
pub max_depth: NonZeroUsize,
/// Whether to validate content structure
pub validate: bool,
}

impl Default for ParseOptions {
fn default() -> Self {
Self {
max_size: MAX_FRONTMATTER_SIZE,
max_depth: MAX_NESTING_DEPTH,
validate: true,
}
}
}

/// Validates input content against security constraints.
///
/// # Security
///
/// This function helps prevent denial of service attacks by:
/// - Limiting the maximum size of frontmatter content
/// - Validating content structure
/// - Checking for malicious patterns
///
/// # Errors
///
/// Returns `FrontmatterError` if:
/// - Content exceeds maximum size
/// - Content contains invalid characters
/// - Content structure is invalid
fn validate_input(content: &str, options: &ParseOptions) -> Result<()> {
// Check content size
if content.len() > options.max_size.get() {
return Err(FrontmatterError::ContentTooLarge {
size: content.len(),
max: options.max_size.get(),

Check warning on line 167 in src/lib.rs

View check run for this annotation

Codecov / codecov/patch

src/lib.rs#L167

Added line #L167 was not covered by tests
});
}

// Validate character content
if content.contains('\0') {
return Err(FrontmatterError::ValidationError(
"Content contains null bytes".to_string(),

Check warning on line 174 in src/lib.rs

View check run for this annotation

Codecov / codecov/patch

src/lib.rs#L173-L174

Added lines #L173 - L174 were not covered by tests
));
}

// Check for other malicious patterns
if content.contains("../") || content.contains("..\\") {
return Err(FrontmatterError::ValidationError(
"Content contains path traversal patterns".to_string(),
));
}

Ok(())
}

/// Extracts and parses frontmatter from content with format auto-detection.
///
/// This function provides a zero-copy extraction of frontmatter, automatically
/// detecting the format (YAML, TOML, or JSON) and parsing it into a structured
/// representation.
/// This function provides zero-copy extraction of frontmatter where possible,
/// automatically detecting the format (YAML, TOML, or JSON) and parsing it
/// into a structured representation.
///
/// # Security
///
/// This function includes several security measures:
/// - Input validation and size limits
/// - Safe string handling
/// - Protection against malicious content
///
/// # Performance
///
/// This function performs a single pass over the input with O(n) complexity
/// and avoids unnecessary allocations where possible.
/// Optimized for performance with:
/// - Zero-copy operations where possible
/// - Single-pass parsing
/// - Minimal allocations
/// - Pre-allocated buffers
///
/// # Examples
///
Expand All @@ -111,15 +226,19 @@ pub type Result<T> = std::result::Result<T, FrontmatterError>;
/// # Errors
///
/// Returns `FrontmatterError` if:
/// - Content exceeds size limits
/// - Content is malformed
/// - Frontmatter format is invalid
/// - Parsing fails
#[inline]
pub fn extract(content: &str) -> Result<(Frontmatter, &str)> {
let options = ParseOptions::default();
validate_input(content, &options)?;

let (raw_frontmatter, remaining_content) =
extract_raw_frontmatter(content)?;
let format = detect_format(raw_frontmatter)?;
let frontmatter = parse(raw_frontmatter, format)?;

Ok((frontmatter, remaining_content))
}

Expand All @@ -130,9 +249,12 @@ pub fn extract(content: &str) -> Result<(Frontmatter, &str)> {
/// * `frontmatter` - The frontmatter to convert
/// * `format` - Target format for conversion
///
/// # Returns
/// # Security
///
/// Returns the formatted string representation or an error.
/// This function includes validation of:
/// - Input size limits
/// - Format compatibility
/// - Output safety
///
/// # Examples
///
Expand Down
Loading

0 comments on commit 135d54c

Please sign in to comment.