diff --git a/CHANGELOG b/CHANGELOG index 2987fe3..2b393b2 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,6 @@ +# Unreleased +- [add][minor] Allow re-use of parsed templates with `Template`, `TemplateBuf`, `ByteTemplate` and `ByteTemplateBuf`. + # Version 0.3.1 - 2024-06-09 - [fix][minor] Fix recursive substitution in braced default values. diff --git a/README.md b/README.md index 05fa532..9b6ed4d 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,16 @@ Shell-like variable substitution for strings and byte strings. Variable names can consist of alphanumeric characters and underscores. They are allowed to start with numbers. +If you want to quickly perform substitution on a string, use [`substitute()`] or [`substitute_bytes()`]. + +It is also possible to use one of the template types. +The templates parse the source string or bytes once, and can be expanded as many times as you want. +There are four different template types to choose from: +* [`Template`]: borrows the source string. +* [`TemplateBuf`]: owns the source string. +* [`ByteTemplate`]: borrows the source bytes. +* [`ByteTemplateBuf`]: owns the source bytes. + ## Examples The [`substitute()`][substitute] function can be used to perform substitution on a `&str`. @@ -26,7 +36,7 @@ variables.insert("name", "world"); assert_eq!(subst::substitute("Hello $name!", &variables)?, "Hello world!"); ``` -The variables can also be taken directly from the environment with the [`Env`][Env] map. +The variables can also be taken directly from the environment with the [`Env`] map. ```rust assert_eq!( @@ -43,8 +53,24 @@ variables.insert("name", b"world"); assert_eq!(subst::substitute_bytes(b"Hello $name!", &variables)?, b"Hello world!"); ``` -[substitute]: https://docs.rs/subst/latest/subst/fn.substitute.html -[substitute_bytes]: https://docs.rs/subst/latest/subst/fn.substitute_bytes.html -[Env]: https://docs.rs/subst/latest/subst/struct.Env.html +You can also parse a template once and expand it multiple times: + +```rust +let mut variables = HashMap::new(); +let template = subst::Template::from_str("Welcome to our hair salon, $name!")?; +for name in ["Scrappy", "Coco"] { + variables.insert("name", name); + let message = template.expand(&variables)?; + println!("{}", message); +} +``` + +[`substitute()`]: https://docs.rs/subst/latest/subst/fn.substitute.html +[`substitute_bytes()`]: https://docs.rs/subst/latest/subst/fn.substitute_bytes.html +[`Template`]: https://docs.rs/subst/latest/subst/struct.Template.html +[`TemplateBuf`]: https://docs.rs/subst/latest/subst/struct.TemplateBuf.html +[`ByteTemplate`]: https://docs.rs/subst/latest/subst/struct.ByteTemplate.html +[`ByteTemplateBuf`]: https://docs.rs/subst/latest/subst/struct.ByteTemplateBuf.html +[`Env`]: https://docs.rs/subst/latest/subst/struct.Env.html [std::collections::HashMap]: https://doc.rust-lang.org/stable/std/collections/struct.HashMap.html [std::collections::BTreeMap]: https://doc.rust-lang.org/stable/std/collections/struct.BTreeMap.html diff --git a/README.tpl b/README.tpl index 56f6054..4778127 100644 --- a/README.tpl +++ b/README.tpl @@ -2,8 +2,12 @@ {{readme}} -[substitute]: https://docs.rs/subst/latest/subst/fn.substitute.html -[substitute_bytes]: https://docs.rs/subst/latest/subst/fn.substitute_bytes.html -[Env]: https://docs.rs/subst/latest/subst/struct.Env.html +[`substitute()`]: https://docs.rs/subst/latest/subst/fn.substitute.html +[`substitute_bytes()`]: https://docs.rs/subst/latest/subst/fn.substitute_bytes.html +[`Template`]: https://docs.rs/subst/latest/subst/struct.Template.html +[`TemplateBuf`]: https://docs.rs/subst/latest/subst/struct.TemplateBuf.html +[`ByteTemplate`]: https://docs.rs/subst/latest/subst/struct.ByteTemplate.html +[`ByteTemplateBuf`]: https://docs.rs/subst/latest/subst/struct.ByteTemplateBuf.html +[`Env`]: https://docs.rs/subst/latest/subst/struct.Env.html [std::collections::HashMap]: https://doc.rust-lang.org/stable/std/collections/struct.HashMap.html [std::collections::BTreeMap]: https://doc.rust-lang.org/stable/std/collections/struct.BTreeMap.html diff --git a/src/error.rs b/src/error.rs index 368b92b..187be4f 100644 --- a/src/error.rs +++ b/src/error.rs @@ -20,6 +20,31 @@ pub enum Error { NoSuchVariable(NoSuchVariable), } +/// An error that can occur while parsing a template. +#[derive(Debug, Clone)] +#[cfg_attr(test, derive(Eq, PartialEq))] +pub enum ParseError { + /// The input string contains an invalid escape sequence. + InvalidEscapeSequence(InvalidEscapeSequence), + + /// The input string contains a variable placeholder without a variable name (`"${}"`). + MissingVariableName(MissingVariableName), + + /// The input string contains an unexpected character. + UnexpectedCharacter(UnexpectedCharacter), + + /// The input string contains an unclosed variable placeholder. + MissingClosingBrace(MissingClosingBrace), +} + +/// An error that can occur while expanding a template. +#[derive(Debug, Clone)] +#[cfg_attr(test, derive(Eq, PartialEq))] +pub enum ExpandError { + /// The input string contains a placeholder for a variable that is not in the variable map. + NoSuchVariable(NoSuchVariable), +} + impl From for Error { #[inline] fn from(other: InvalidEscapeSequence) -> Self { @@ -55,6 +80,62 @@ impl From for Error { } } +impl From for Error { + #[inline] + fn from(other: ParseError) -> Self { + match other { + ParseError::InvalidEscapeSequence(e) => Self::InvalidEscapeSequence(e), + ParseError::MissingVariableName(e) => Self::MissingVariableName(e), + ParseError::UnexpectedCharacter(e) => Self::UnexpectedCharacter(e), + ParseError::MissingClosingBrace(e) => Self::MissingClosingBrace(e), + } + } +} + +impl From for Error { + #[inline] + fn from(other: ExpandError) -> Self { + match other { + ExpandError::NoSuchVariable(e) => Self::NoSuchVariable(e), + } + } +} + +impl From for ParseError { + #[inline] + fn from(other: InvalidEscapeSequence) -> Self { + Self::InvalidEscapeSequence(other) + } +} + +impl From for ParseError { + #[inline] + fn from(other: MissingVariableName) -> Self { + Self::MissingVariableName(other) + } +} + +impl From for ParseError { + #[inline] + fn from(other: UnexpectedCharacter) -> Self { + Self::UnexpectedCharacter(other) + } +} + +impl From for ParseError { + #[inline] + fn from(other: MissingClosingBrace) -> Self { + Self::MissingClosingBrace(other) + } +} + +impl From for ExpandError { + #[inline] + fn from(other: NoSuchVariable) -> Self { + Self::NoSuchVariable(other) + } +} + impl std::error::Error for Error {} impl std::fmt::Display for Error { @@ -70,6 +151,31 @@ impl std::fmt::Display for Error { } } +impl std::error::Error for ParseError {} + +impl std::fmt::Display for ParseError { + #[inline] + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + Self::InvalidEscapeSequence(e) => e.fmt(f), + Self::MissingVariableName(e) => e.fmt(f), + Self::UnexpectedCharacter(e) => e.fmt(f), + Self::MissingClosingBrace(e) => e.fmt(f), + } + } +} + +impl std::error::Error for ExpandError {} + +impl std::fmt::Display for ExpandError { + #[inline] + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + Self::NoSuchVariable(e) => e.fmt(f), + } + } +} + /// A character or byte from the input. #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub enum CharOrByte { diff --git a/src/lib.rs b/src/lib.rs index 4cd7317..ec8c150 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,6 +13,16 @@ //! Variable names can consist of alphanumeric characters and underscores. //! They are allowed to start with numbers. //! +//! If you want to quickly perform substitution on a string, use [`substitute()`] or [`substitute_bytes()`]. +//! +//! It is also possible to use one of the template types. +//! The templates parse the source string or bytes once, and can be expanded as many times as you want. +//! There are four different template types to choose from: +//! * [`Template`]: borrows the source string. +//! * [`TemplateBuf`]: owns the source string. +//! * [`ByteTemplate`]: borrows the source bytes. +//! * [`ByteTemplateBuf`]: owns the source bytes. +//! //! # Examples //! //! The [`substitute()`][substitute] function can be used to perform substitution on a `&str`. @@ -28,7 +38,7 @@ //! # } //! ``` //! -//! The variables can also be taken directly from the environment with the [`Env`][Env] map. +//! The variables can also be taken directly from the environment with the [`Env`] map. //! //! ``` //! # fn main() -> Result<(), subst::Error> { @@ -52,6 +62,23 @@ //! # Ok(()) //! # } //! ``` +//! +//! You can also parse a template once and expand it multiple times: +//! +//! ``` +//! # fn main() -> Result<(), subst::Error> { +//! # use std::collections::HashMap; +//! let mut variables = HashMap::new(); +//! let template = subst::Template::from_str("Welcome to our hair salon, $name!")?; +//! for name in ["Scrappy", "Coco"] { +//! variables.insert("name", name); +//! let message = template.expand(&variables)?; +//! println!("{}", message); +//! # assert_eq!(message, format!("Welcome to our hair salon, {name}!")); +//! } +//! # Ok(()) +//! # } +//! ``` #![warn(missing_docs, missing_debug_implementations)] pub mod error; @@ -60,6 +87,9 @@ pub use error::Error; mod map; pub use map::*; +mod template; +pub use template::*; + #[cfg(feature = "yaml")] pub mod yaml; @@ -78,12 +108,8 @@ where M: VariableMap<'a> + ?Sized, M::Value: AsRef, { - let mut output = Vec::with_capacity(source.len() + source.len() / 10); - substitute_impl(&mut output, source.as_bytes(), 0..source.len(), variables, &|x| { - x.as_ref().as_bytes() - })?; - // SAFETY: Both source and all variable values are valid UTF-8, so substitation result is also valid UTF-8. - unsafe { Ok(String::from_utf8_unchecked(output)) } + let output = template::Template::from_str(source)?.expand(variables)?; + Ok(output) } /// Substitute variables in a byte string. @@ -102,273 +128,10 @@ where M: VariableMap<'a> + ?Sized, M::Value: AsRef<[u8]>, { - let mut output = Vec::with_capacity(source.len() + source.len() / 10); - substitute_impl(&mut output, source, 0..source.len(), variables, &|x| x.as_ref())?; + let output = template::ByteTemplate::from_slice(source)?.expand(variables)?; Ok(output) } -/// Substitute variables in a byte string. -/// -/// This is the real implementation used by both [`substitute`] and [`substitute_bytes`]. -/// The function accepts any type that implements [`VariableMap`], and a function to convert the value from the map into bytes. -fn substitute_impl<'a, M, F>( - output: &mut Vec, - source: &[u8], - range: std::ops::Range, - variables: &'a M, - to_bytes: &F, -) -> Result<(), Error> -where - M: VariableMap<'a> + ?Sized, - F: Fn(&M::Value) -> &[u8], -{ - let mut finger = range.start; - while finger < range.end { - let next = match memchr::memchr2(b'$', b'\\', &source[finger..range.end]) { - Some(x) => finger + x, - None => break, - }; - - output.extend_from_slice(&source[finger..next]); - if source[next] == b'\\' { - output.push(unescape_one(source, next)?); - finger = next + 2; - } else { - let variable = parse_variable(source, next)?; - let value = variables.get(variable.name); - match (&value, &variable.default) { - (None, None) => { - return Err(error::NoSuchVariable { - position: variable.name_start, - name: variable.name.to_owned(), - } - .into()); - }, - (Some(value), _default) => { - output.extend_from_slice(to_bytes(value)); - }, - (None, Some(default)) => { - substitute_impl(output, source, default.clone(), variables, to_bytes)?; - }, - }; - finger = variable.end_position; - } - } - - output.extend_from_slice(&source[finger..range.end]); - Ok(()) -} - -/// A parsed variable. -#[derive(Debug)] -struct Variable<'a> { - /// The name of the variable. - name: &'a str, - - /// The start position of the name in the source. - name_start: usize, - - /// The default value of the variable. - default: Option>, - - /// The end position of the entire variable in the source. - end_position: usize, -} - -/// Parse a variable from source at the given position. -/// -/// The finger must be the position of the dollar sign in the source. -fn parse_variable(source: &[u8], finger: usize) -> Result { - if finger == source.len() { - return Err(error::MissingVariableName { - position: finger, - len: 1, - } - .into()); - } - if source[finger + 1] == b'{' { - parse_braced_variable(source, finger) - } else { - let name_end = match source[finger + 1..] - .iter() - .position(|&c| !c.is_ascii_alphanumeric() && c != b'_') - { - Some(0) => { - return Err(error::MissingVariableName { - position: finger, - len: 1, - } - .into()); - }, - Some(x) => finger + 1 + x, - None => source.len(), - }; - Ok(Variable { - name: std::str::from_utf8(&source[finger + 1..name_end]).unwrap(), - name_start: finger + 1, - default: None, - end_position: name_end, - }) - } -} - -/// Parse a braced variable in the form of "${name[:default]} from source at the given position. -/// -/// The finger must be the position of the dollar sign in the source. -fn parse_braced_variable(source: &[u8], finger: usize) -> Result { - let name_start = finger + 2; - if name_start >= source.len() { - return Err(error::MissingVariableName { - position: finger, - len: 2, - } - .into()); - } - - // Get the first sequence of alphanumeric characters and underscores for the variable name. - let name_end = match source[name_start..] - .iter() - .position(|&c| !c.is_ascii_alphanumeric() && c != b'_') - { - Some(0) => { - return Err(error::MissingVariableName { - position: finger, - len: 2, - } - .into()); - }, - Some(x) => name_start + x, - None => source.len(), - }; - - // If the name extends to the end, we're missing a closing brace. - if name_end == source.len() { - return Err(error::MissingClosingBrace { position: finger + 1 }.into()); - } - - // If there is a closing brace after the name, there is no default value and we're done. - if source[name_end] == b'}' { - return Ok(Variable { - name: std::str::from_utf8(&source[name_start..name_end]).unwrap(), - name_start, - default: None, - end_position: name_end + 1, - }); - - // If there is something other than a closing brace or colon after the name, it's an error. - } else if source[name_end] != b':' { - return Err(error::UnexpectedCharacter { - position: name_end, - character: get_maybe_char_at(source, name_end), - expected: error::ExpectedCharacter { - message: "a closing brace ('}') or colon (':')", - }, - } - .into()); - } - - // If there is no un-escaped closing brace pair, it's missing. - let end = - finger + find_closing_brace(&source[finger..]).ok_or(error::MissingClosingBrace { position: finger + 1 })?; - - Ok(Variable { - name: std::str::from_utf8(&source[name_start..name_end]).unwrap(), - name_start, - default: Some(name_end + 1..end), - end_position: end + 1, - }) -} - -/// Get the prefix from the input that is valid UTF-8 as [`str`]. -/// -/// If the whole input is valid UTF-8, the whole input is returned. -/// If the first byte is already invalid UTF-8, an empty string is returned. -fn valid_utf8_prefix(input: &[u8]) -> &str { - // The unwrap can not panic: we used `e.valid_up_to()` to get the valid UTF-8 slice. - std::str::from_utf8(input) - .or_else(|e| std::str::from_utf8(&input[..e.valid_up_to()])) - .unwrap() -} - -/// Get the character at a given index. -/// -/// If the data at the given index contains a valid UTF-8 sequence, -/// returns a [`error::CharOrByte::Char`]. -/// Otherwise, returns a [`error::CharOrByte::Byte`]. -fn get_maybe_char_at(data: &[u8], index: usize) -> error::CharOrByte { - let head = &data[index..]; - let head = &head[..head.len().min(4)]; - assert!( - !head.is_empty(), - "index out of bounds: data.len() is {} but index is {}", - data.len(), - index, - ); - - let head = valid_utf8_prefix(head); - if let Some(c) = head.chars().next() { - error::CharOrByte::Char(c) - } else { - error::CharOrByte::Byte(data[index]) - } -} - -/// Find the closing brace of recursive substitutions. -fn find_closing_brace(haystack: &[u8]) -> Option { - let mut finger = 0; - // We need to count the first opening brace - let mut nested = -1; - while finger < haystack.len() { - let candidate = memchr::memchr3(b'\\', b'{', b'}', &haystack[finger..])?; - if haystack[finger + candidate] == b'\\' { - if candidate == haystack.len() - 1 { - return None; - } - finger += candidate + 2; - } else if haystack[finger + candidate] == b'{' { - if candidate == haystack.len() - 1 { - return None; - } - nested += 1; - finger += candidate + 1; - } else if nested != 0 { - nested -= 1; - finger += candidate + 1; - } else { - return Some(finger + candidate); - } - } - None -} - -/// Unescape a single escape sequence in source at the given position. -/// -/// The `position` must point to the backslash character in the source text. -/// -/// Only valid escape sequences ('\$' '\{' '\}' and '\:') are accepted. -/// Invalid escape sequences cause an error to be returned. -fn unescape_one(source: &[u8], position: usize) -> Result { - if position == source.len() - 1 { - return Err(error::InvalidEscapeSequence { - position, - character: None, - } - .into()); - } - match source[position + 1] { - b'\\' => Ok(b'\\'), - b'$' => Ok(b'$'), - b'{' => Ok(b'{'), - b'}' => Ok(b'}'), - b':' => Ok(b':'), - _ => Err(error::InvalidEscapeSequence { - position, - character: Some(get_maybe_char_at(source, position + 1)), - } - .into()), - } -} - #[cfg(test)] #[rustfmt::skip] mod test { @@ -376,40 +139,6 @@ mod test { use assert2::{assert, check, let_assert}; use std::collections::BTreeMap; - #[test] - fn test_get_maybe_char_at() { - use error::CharOrByte::{Byte, Char}; - - assert!(get_maybe_char_at(b"hello", 0) == Char('h')); - assert!(get_maybe_char_at(b"he", 0) == Char('h')); - assert!(get_maybe_char_at(b"hello", 1) == Char('e')); - assert!(get_maybe_char_at(b"he", 1) == Char('e')); - assert!(get_maybe_char_at(b"hello\x80", 1) == Char('e')); - assert!(get_maybe_char_at(b"he\x80llo\x80", 1) == Char('e')); - - assert!(get_maybe_char_at(b"h\x79", 1) == Char('\x79')); - assert!(get_maybe_char_at(b"h\x80llo", 1) == Byte(0x80)); - - // The UTF-8 sequence for '❤' is [0xE2, 0x9D, 0xA4]". - assert!(get_maybe_char_at("h❤ll❤".as_bytes(), 0) == Char('h')); - assert!(get_maybe_char_at("h❤ll❤".as_bytes(), 1) == Char('❤')); - assert!(get_maybe_char_at("h❤ll❤".as_bytes(), 2) == Byte(0x9d)); - assert!(get_maybe_char_at("h❤ll❤".as_bytes(), 3) == Byte(0xA4)); - assert!(get_maybe_char_at("h❤ll❤".as_bytes(), 4) == Char('l')); - assert!(get_maybe_char_at("h❤ll❤".as_bytes(), 5) == Char('l')); - assert!(get_maybe_char_at("h❤ll❤".as_bytes(), 6) == Char('❤')); - assert!(get_maybe_char_at("h❤ll❤".as_bytes(), 7) == Byte(0x9d)); - assert!(get_maybe_char_at("h❤ll❤".as_bytes(), 8) == Byte(0xA4)); - } - - #[test] - fn test_find_closing_brace() { - check!(find_closing_brace(b"${foo}") == Some(5)); - check!(find_closing_brace(b"{\\{}foo") == Some(3)); - check!(find_closing_brace(b"{{}}foo $bar") == Some(3)); - check!(find_closing_brace(b"foo{\\}}bar") == Some(6)); - } - #[test] fn test_substitute() { let mut map: BTreeMap = BTreeMap::new(); diff --git a/src/template/mod.rs b/src/template/mod.rs new file mode 100644 index 0000000..e7cf74c --- /dev/null +++ b/src/template/mod.rs @@ -0,0 +1,405 @@ +use crate::error::{ExpandError, ParseError}; +use crate::VariableMap; + +mod raw; + +/// A parsed string template that borrows the source string. +/// +/// You can parse the template once and call [`Self::expand()`] multiple times. +/// This is generally more efficient than calling [`substitute()`][crate::substitute] multiple times on the same string. +/// +/// This template borrows the source string. +/// You can use [`TemplateBuf`] if you need a template that owns the source string. +/// +/// If you have a byte slice or vector instead of a string, +/// you can use [`ByteTemplate`] or [`ByteTemplateBuf`]. +#[derive(Clone)] +pub struct Template<'a> { + source: &'a str, + raw: raw::Template, +} + +impl std::fmt::Debug for Template<'_> { + #[inline] + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_tuple("Template").field(&self.source).finish() + } +} + +impl<'a> Template<'a> { + /// Parse a template from a string slice. + /// + /// The source is can contain variables to be substituted later, + /// when you call [`Self::expand()`]. + /// + /// Variables have the form `$NAME`, `${NAME}` or `${NAME:default}`. + /// A variable name can only consist of ASCII letters, digits and underscores. + /// They are allowed to start with numbers. + /// + /// You can escape dollar signs, backslashes, colons and braces with a backslash. + #[inline] + #[allow(clippy::should_implement_trait)] + pub fn from_str(source: &'a str) -> Result { + Ok(Self { + source, + raw: raw::Template::parse(source.as_bytes(), 0)?, + }) + } + + /// Get the original source string. + #[inline] + pub fn source(&self) -> &str { + self.source + } + + /// Expand the template. + /// + /// This will substitute all variables in the template with the values from the given map. + /// + /// You can pass either a [`HashMap`][std::collections::HashMap], [`BTreeMap`][std::collections::BTreeMap] or [`Env`][crate::Env] as the `variables` parameter. + /// The maps must have [`&str`] or [`String`] keys, and the values must be [`AsRef`]. + pub fn expand<'b, M>(&self, variables: &'b M) -> Result + where + M: VariableMap<'b> + ?Sized, + M::Value: AsRef, + { + let mut output = Vec::with_capacity(self.source.len() + self.source.len() / 10); + self.raw.expand(&mut output, self.source.as_bytes(), variables, &|x| { + x.as_ref().as_bytes() + })?; + // SAFETY: Both source and all variable values are valid UTF-8, so substitation result is also valid UTF-8. + unsafe { Ok(String::from_utf8_unchecked(output)) } + } + + /// Transmute the lifetime of the source data. + /// + /// # Safety: + /// You must ensure that template and the source data are not used after the source data becomes invalid. + unsafe fn transmute_lifetime<'b>(self) -> Template<'b> { + std::mem::transmute(self) + } +} + +/// A parsed string template that owns the source string. +/// +/// You can parse the template once and call [`Self::expand()`] multiple times. +/// This is generally more efficient than calling [`substitute()`][crate::substitute] multiple times on the same string. +/// +/// This template owns the source string. +/// If you do not need ownership, you can also use [`Template`] to borrow it instead. +/// Depending on your application, that could prevent creating an unnecessary copy of the source data. +/// +/// If you have a byte slice or vector instead of a string, +/// you can use [`ByteTemplate`] or [`ByteTemplateBuf`]. +#[derive(Clone)] +pub struct TemplateBuf { + source: String, + template: Template<'static>, +} + +impl std::fmt::Debug for TemplateBuf { + #[inline] + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_tuple("TemplateBuf").field(&self.source).finish() + } +} + +impl TemplateBuf { + /// Parse a template from a string. + /// + /// This takes ownership of the string. + /// + /// The source is can contain variables to be substituted later, + /// when you call [`Self::expand()`]. + /// + /// Variables have the form `$NAME`, `${NAME}` or `${NAME:default}`. + /// A variable name can only consist of ASCII letters, digits and underscores. + /// They are allowed to start with numbers. + /// + /// You can escape dollar signs, backslashes, colons and braces with a backslash. + #[inline] + pub fn from_string(source: String) -> Result { + let template = Template::from_str(&source)?; + + // SAFETY: + // The str slice given to `template` must remain valid. + // Since `String` keeps data on the heap, it remains valid when the `source` is moved. + // We MUST ensure we do not modify, drop or overwrite `source`. + let template = unsafe { template.transmute_lifetime() }; + Ok(Self { source, template }) + } + + /// Consume the template to get the original source string. + #[inline] + pub fn into_source(self) -> String { + self.source + } + + /// Borrow the template. + #[inline] + #[allow(clippy::needless_lifetimes)] + pub fn as_template<'a>(&'a self) -> &'a Template<'a> { + &self.template + } + + /// Expand the template. + /// + /// This will substitute all variables in the template with the values from the given map. + /// + /// You can pass either a [`HashMap`][std::collections::HashMap], [`BTreeMap`][std::collections::BTreeMap] or [`Env`][crate::Env] as the `variables` parameter. + /// The maps must have [`&str`] or [`String`] keys, and the values must be [`AsRef`]. + pub fn expand<'b, M>(&self, variables: &'b M) -> Result + where + M: VariableMap<'b> + ?Sized, + M::Value: AsRef, + { + self.template.expand(variables) + } +} + +impl<'a> From<&'a TemplateBuf> for &'a Template<'a> { + #[inline] + fn from(other: &'a TemplateBuf) -> Self { + other.as_template() + } +} + +impl<'a> From<&'a TemplateBuf> for Template<'a> { + #[inline] + fn from(other: &'a TemplateBuf) -> Self { + other.as_template().clone() + } +} + +impl From<&Template<'_>> for TemplateBuf { + #[inline] + fn from(other: &Template<'_>) -> Self { + other.clone().into() + } +} + +impl From> for TemplateBuf { + #[inline] + fn from(other: Template<'_>) -> Self { + let source: String = other.source.into(); + + let template = Template { + source: source.as_str(), + raw: other.raw, + }; + + // SAFETY: + // The str slice given to `template` must remain valid. + // Since `String` keeps data on the heap, it remains valid when the `source` is moved. + // We MUST ensure we do not modify, drop or overwrite `source`. + let template = unsafe { template.transmute_lifetime() }; + + Self { source, template } + } +} + +/// A parsed byte template that borrows the source slice. +/// +/// You can parse the template once and call [`Self::expand()`] multiple times. +/// This is generally more efficient than calling [`substitute()`][crate::substitute] multiple times on the same string. +/// +/// This template borrows the source data. +/// You can use [`ByteTemplateBuf`] if you need a template that owns the source data. +/// +/// If you have a string instead of a byte slice, +/// you can use [`Template`] or [`TemplateBuf`]. +#[derive(Clone)] +pub struct ByteTemplate<'a> { + source: &'a [u8], + raw: raw::Template, +} + +impl std::fmt::Debug for ByteTemplate<'_> { + #[inline] + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_tuple("ByteTemplate") + .field(&DebugByteString(self.source)) + .finish() + } +} + +impl<'a> ByteTemplate<'a> { + /// Parse a template from a byte slice. + /// + /// The source is can contain variables to be substituted later, + /// when you call [`Self::expand()`]. + /// + /// Variables have the form `$NAME`, `${NAME}` or `${NAME:default}`. + /// A variable name can only consist of ASCII letters, digits and underscores. + /// They are allowed to start with numbers. + /// + /// You can escape dollar signs, backslashes, colons and braces with a backslash. + #[inline] + pub fn from_slice(source: &'a [u8]) -> Result { + Ok(Self { + source, + raw: raw::Template::parse(source, 0)?, + }) + } + + /// Get the original source slice. + #[inline] + pub fn source(&self) -> &[u8] { + self.source + } + + /// Expand the template. + /// + /// This will substitute all variables in the template with the values from the given map. + /// + /// You can pass either a [`HashMap`][std::collections::HashMap], [`BTreeMap`][std::collections::BTreeMap] or [`Env`][crate::Env] as the `variables` parameter. + /// The maps must have [`&str`] or [`String`] keys, and the values must be [`AsRef<[u8]>`]. + pub fn expand<'b, M>(&self, variables: &'b M) -> Result, ExpandError> + where + M: VariableMap<'b> + ?Sized, + M::Value: AsRef<[u8]>, + { + let mut output = Vec::with_capacity(self.source.len() + self.source.len() / 10); + self.raw.expand(&mut output, self.source, variables, &|x| x.as_ref())?; + Ok(output) + } + + /// Transmute the lifetime of the source data. + /// + /// # Safety: + /// You must ensure that template and the source data are not used after the source data becomes invalid. + unsafe fn transmute_lifetime<'b>(self) -> ByteTemplate<'b> { + std::mem::transmute(self) + } +} + +/// A parsed byte template that owns the source vector. +/// +/// You can parse the template once and call [`Self::expand()`] multiple times. +/// This is generally more efficient than calling [`substitute()`][crate::substitute] multiple times on the same string. +/// +/// This template takes ownership of the source data. +/// If you do not need ownership, you can also use [`ByteTemplate`] to borrow it instead. +/// Depending on your application, that could prevent creating an unnecessary copy of the source data. +/// +/// If you have a string instead of a byte slice, +/// you can use [`Template`] or [`TemplateBuf`]. +#[derive(Clone)] +pub struct ByteTemplateBuf { + source: Vec, + template: ByteTemplate<'static>, +} + +impl std::fmt::Debug for ByteTemplateBuf { + #[inline] + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_tuple("ByteTemplateBuf") + .field(&DebugByteString(&self.source)) + .finish() + } +} + +impl ByteTemplateBuf { + /// Parse a template from a vector of bytes. + /// + /// The source is can contain variables to be substituted later, + /// when you call [`Self::expand()`]. + /// + /// Variables have the form `$NAME`, `${NAME}` or `${NAME:default}`. + /// A variable name can only consist of ASCII letters, digits and underscores. + /// They are allowed to start with numbers. + /// + /// You can escape dollar signs, backslashes, colons and braces with a backslash. + #[inline] + pub fn from_vec(source: Vec) -> Result { + let template = ByteTemplate::from_slice(&source)?; + + // SAFETY: + // The slice given to `template` must remain valid. + // Since `Vec` keeps data on the heap, it remains valid when the `source` is moved. + // We MUST ensure we do not modify, drop or overwrite `source`. + let template = unsafe { std::mem::transmute::, ByteTemplate<'static>>(template) }; + Ok(Self { source, template }) + } + + /// Consume the template to get the original source vector. + #[inline] + pub fn into_source(self) -> Vec { + self.source + } + + /// Borrow the template. + #[inline] + #[allow(clippy::needless_lifetimes)] + pub fn as_template<'a>(&'a self) -> &'a ByteTemplate<'static> { + &self.template + } + + /// Expand the template. + /// + /// This will substitute all variables in the template with the values from the given map. + /// + /// You can pass either a [`HashMap`][std::collections::HashMap], [`BTreeMap`][std::collections::BTreeMap] or [`Env`][crate::Env] as the `variables` parameter. + /// The maps must have [`&str`] or [`String`] keys, and the values must be [`AsRef<[u8]>`]. + pub fn expand<'b, M>(&self, variables: &'b M) -> Result, ExpandError> + where + M: VariableMap<'b> + ?Sized, + M::Value: AsRef<[u8]>, + { + self.template.expand(variables) + } +} + +impl<'a> From<&'a ByteTemplateBuf> for &'a ByteTemplate<'a> { + #[inline] + fn from(other: &'a ByteTemplateBuf) -> Self { + other.as_template() + } +} + +impl<'a> From<&'a ByteTemplateBuf> for ByteTemplate<'a> { + #[inline] + fn from(other: &'a ByteTemplateBuf) -> Self { + other.as_template().clone() + } +} + +impl From<&ByteTemplate<'_>> for ByteTemplateBuf { + #[inline] + fn from(other: &ByteTemplate<'_>) -> Self { + other.clone().into() + } +} + +impl From> for ByteTemplateBuf { + #[inline] + fn from(other: ByteTemplate<'_>) -> Self { + let source: Vec = other.source.into(); + + let template = ByteTemplate { + source: source.as_slice(), + raw: other.raw, + }; + + // SAFETY: + // The slice given to `template` must remain valid. + // Since `Vec` keeps data on the heap, it remains valid when the `source` is moved. + // We MUST ensure we do not modify, drop or overwrite `source`. + let template = unsafe { template.transmute_lifetime() }; + + Self { source, template } + } +} + +struct DebugByteString<'a>(&'a [u8]); + +impl std::fmt::Debug for DebugByteString<'_> { + #[inline] + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if let Ok(data) = std::str::from_utf8(self.0) { + write!(f, "b{:?}", data) + } else { + std::fmt::Debug::fmt(self.0, f) + } + } +} diff --git a/src/template/raw/expand.rs b/src/template/raw/expand.rs new file mode 100644 index 0000000..d6bb16a --- /dev/null +++ b/src/template/raw/expand.rs @@ -0,0 +1,61 @@ +use super::{Part, Template, Variable}; +use crate::error::{self, ExpandError}; +use crate::VariableMap; + +impl Template { + /// Expand the template into the output vector. + pub fn expand<'a, M, F>( + &self, + output: &mut Vec, + source: &[u8], + variables: &'a M, + to_bytes: &F, + ) -> Result<(), ExpandError> + where + M: VariableMap<'a> + ?Sized, + F: Fn(&M::Value) -> &[u8], + { + // Expand all parts one by one. + for part in &self.parts { + match part { + Part::Literal(x) => output.extend_from_slice(&source[x.range.clone()]), + Part::EscapedByte(x) => output.push(x.value), + Part::Variable(x) => x.expand(output, source, variables, to_bytes)?, + } + } + Ok(()) + } +} + +impl Variable { + /// Expand the variable into the output vector. + fn expand<'a, M, F>( + &self, + output: &mut Vec, + source: &[u8], + variables: &'a M, + to_bytes: &F, + ) -> Result<(), ExpandError> + where + M: VariableMap<'a> + ?Sized, + F: Fn(&M::Value) -> &[u8], + { + // Names were already checked to match a restricted set of valid characters, so they are guaranteed to be valid UTF-8. + let name = std::str::from_utf8(&source[self.name.clone()]).unwrap(); + + // If the variable appears in the map, use the value from the map. + if let Some(value) = variables.get(name) { + output.extend_from_slice(to_bytes(&value)); + Ok(()) + // Otherwise, use the default value, if given in the template. + } else if let Some(default) = &self.default { + default.expand(output, source, variables, to_bytes) + // Else, raise an error. + } else { + Err(ExpandError::NoSuchVariable(error::NoSuchVariable { + position: self.name.start, + name: name.to_owned(), + })) + } + } +} diff --git a/src/template/raw/mod.rs b/src/template/raw/mod.rs new file mode 100644 index 0000000..184d454 --- /dev/null +++ b/src/template/raw/mod.rs @@ -0,0 +1,58 @@ +mod expand; +mod parse; + +/// Raw template that doesn't know track the original source. +/// +/// Internally, this keeps a bunch of offsets into the original source. +#[derive(Clone)] +pub struct Template { + /// The individual parts that make up the template. + parts: Vec, +} + +/// One piece of a parsed template. +#[derive(Clone)] +pub enum Part { + /// A literal string to be used verbatim from the original source. + Literal(Literal), + + /// An escaped byte. + EscapedByte(EscapedByte), + + /// A variable to be substituted at expansion time. + Variable(Variable), +} + +/// A literal string to be used verbatim from the original source. +#[derive(Clone)] +pub struct Literal { + /// The range of the literal in the original source. + /// + /// Will be copied verbatim to the output at expansion time. + /// + /// The literal can not contain any escaped characters or variables. + range: std::ops::Range, +} + +/// An escaped byte. +#[derive(Clone)] +pub struct EscapedByte { + /// The escaped byte. + /// + /// Will be copied to the output at expansion time. + value: u8, +} + +/// A variable to be substituted at expansion time. +#[derive(Clone)] +pub struct Variable { + /// The range in the source defining the name of the variable. + /// + /// Used for look-up in the variable map at expansion time. + name: std::ops::Range, + + /// Default value for the variable. + /// + /// Will be used if the variable does not appear in the variable map at expansion time. + default: Option