Skip to content

Commit 109e2f5

Browse files
committed
encode space to '%20' as per url standard
Previously the space character was exclusively encoded to '+'. This is wrong, as the URL Standard [0] specifies that the default is '%20'. Another function has been introduced as well, which replicates the old behavior and converts spaces to '+'. Notice that this breaks the default behavior and could lead to bugs. [0]: https://url.spec.whatwg.org/#string-percent-encode-after-encoding Fixes: #927 Fixes: #888 Signed-off-by: Gabriel Goller <[email protected]>
1 parent de947ab commit 109e2f5

File tree

2 files changed

+53
-5
lines changed

2 files changed

+53
-5
lines changed

form_urlencoded/src/lib.rs

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -116,16 +116,33 @@ impl<'a> Iterator for ParseIntoOwned<'a> {
116116

117117
/// The [`application/x-www-form-urlencoded` byte serializer](
118118
/// https://url.spec.whatwg.org/#concept-urlencoded-byte-serializer).
119+
/// Converts spaces (b' ') to the percent-encoded equivalent ("%20").
119120
///
120121
/// Return an iterator of `&str` slices.
121122
pub fn byte_serialize(input: &[u8]) -> ByteSerialize<'_> {
122-
ByteSerialize { bytes: input }
123+
ByteSerialize {
124+
bytes: input,
125+
space_as_plus: false,
126+
}
127+
}
128+
129+
/// The [`application/x-www-form-urlencoded` byte serializer](
130+
/// https://url.spec.whatwg.org/#concept-urlencoded-byte-serializer).
131+
/// Converts spaces (b' ') to plus signs (b'+').
132+
///
133+
/// Return an iterator of `&str` slices.
134+
pub fn byte_serialize_space_as_plus(input: &[u8]) -> ByteSerialize<'_> {
135+
ByteSerialize {
136+
bytes: input,
137+
space_as_plus: true,
138+
}
123139
}
124140

125141
/// Return value of `byte_serialize()`.
126142
#[derive(Debug)]
127143
pub struct ByteSerialize<'a> {
128144
bytes: &'a [u8],
145+
space_as_plus: bool,
129146
}
130147

131148
fn byte_serialized_unchanged(byte: u8) -> bool {
@@ -139,7 +156,7 @@ impl<'a> Iterator for ByteSerialize<'a> {
139156
if let Some((&first, tail)) = self.bytes.split_first() {
140157
if !byte_serialized_unchanged(first) {
141158
self.bytes = tail;
142-
return Some(if first == b' ' {
159+
return Some(if first == b' ' && self.space_as_plus {
143160
"+"
144161
} else {
145162
percent_encode_byte(first)
@@ -337,7 +354,7 @@ impl<'a, T: Target> Serializer<'a, T> {
337354
/// .append_pair("foo", "bar & baz")
338355
/// .append_pair("saison", "Été+hiver")
339356
/// .finish();
340-
/// assert_eq!(encoded, "foo=bar+%26+baz&saison=%C3%89t%C3%A9%2Bhiver");
357+
/// assert_eq!(encoded, "foo=bar%20%26%20baz&saison=%C3%89t%C3%A9%2Bhiver");
341358
/// ```
342359
///
343360
/// Panics if called more than once.
@@ -428,3 +445,34 @@ pub(crate) fn decode_utf8_lossy(input: Cow<'_, [u8]>) -> Cow<'_, str> {
428445
}
429446

430447
pub type EncodingOverride<'a> = Option<&'a dyn Fn(&str) -> Cow<'_, [u8]>>;
448+
449+
#[cfg(test)]
450+
mod tests {
451+
use alloc::string::String;
452+
453+
use crate::{byte_serialize, byte_serialize_space_as_plus};
454+
455+
#[test]
456+
fn byte_serializer() {
457+
let in_1 = "c ool/org";
458+
let out_1 = "c%20ool%2Forg";
459+
460+
let in_2 = "a🔒nother&bu=ck?et";
461+
let out_2 = "a%F0%9F%94%92nother%26bu%3Dck%3Fet";
462+
463+
assert_eq!(byte_serialize(in_1.as_bytes()).collect::<String>(), out_1);
464+
assert_eq!(byte_serialize(in_2.as_bytes()).collect::<String>(), out_2);
465+
}
466+
467+
#[test]
468+
fn byte_serializer_space_as_plus() {
469+
let in_1 = "c ool/org";
470+
let out_1 = "c+ool%2Forg";
471+
472+
let in_2 = "a🔒nother&bu=ck?et ";
473+
let out_2 = "a%F0%9F%94%92nother%26bu%3Dck%3Fet+";
474+
475+
assert_eq!(byte_serialize_space_as_plus(in_1.as_bytes()).collect::<String>(), out_1);
476+
assert_eq!(byte_serialize_space_as_plus(in_2.as_bytes()).collect::<String>(), out_2);
477+
}
478+
}

url/src/lib.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1623,9 +1623,9 @@ impl Url {
16231623
/// .clear()
16241624
/// .append_pair("foo", "bar & baz")
16251625
/// .append_pair("saisons", "\u{00C9}t\u{00E9}+hiver");
1626-
/// assert_eq!(url.query(), Some("foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver"));
1626+
/// assert_eq!(url.query(), Some("foo=bar%20%26%20baz&saisons=%C3%89t%C3%A9%2Bhiver"));
16271627
/// assert_eq!(url.as_str(),
1628-
/// "https://example.net/?foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver#nav");
1628+
/// "https://example.net/?foo=bar%20%26%20baz&saisons=%C3%89t%C3%A9%2Bhiver#nav");
16291629
/// # Ok(())
16301630
/// # }
16311631
/// # run().unwrap();

0 commit comments

Comments
 (0)