From 360b17eb2a73ceb36da230f45bca5a93a5988ab1 Mon Sep 17 00:00:00 2001
From: Josh McKinney <joshka@users.noreply.github.com>
Date: Wed, 18 Sep 2024 21:17:17 -0700
Subject: [PATCH 1/3] Add AsciiSet::EMPTY and impl ops::Add for AsciiSet

In RFCs, the sets of characters to percent-encode are often defined as
the union of multiple sets. This change adds an `EMPTY` constant to
`AsciiSet` and implements the `Add` trait for `AsciiSet` so that sets
can be combined with the `+` operator.

AsciiSet now derives `Debug`, `PartialEq`, and `Eq` so that it can be
used in tests.
---
 percent_encoding/src/lib.rs | 33 ++++++++++++++++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)
diff --git a/percent_encoding/src/lib.rs b/percent_encoding/src/lib.rs
index 10e0fc69..72d144bb 100644
--- a/percent_encoding/src/lib.rs
+++ b/percent_encoding/src/lib.rs
@@ -51,7 +51,7 @@ use alloc::{
     string::String,
     vec::Vec,
 };
-use core::{fmt, mem, slice, str};
+use core::{fmt, mem, ops, slice, str};
 
 /// Represents a set of characters or bytes in the ASCII range.
 ///
@@ -66,6 +66,7 @@ use core::{fmt, mem, slice, str};
 /// /// https://url.spec.whatwg.org/#fragment-percent-encode-set
 /// const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`');
 /// ```
+#[derive(Debug, PartialEq, Eq)]
 pub struct AsciiSet {
     mask: [Chunk; ASCII_RANGE_LEN / BITS_PER_CHUNK],
 }
@@ -77,6 +78,11 @@ const ASCII_RANGE_LEN: usize = 0x80;
 const BITS_PER_CHUNK: usize = 8 * mem::size_of::<Chunk>();
 
 impl AsciiSet {
+    /// An empty set.
+    pub const EMPTY: AsciiSet = AsciiSet {
+        mask: [0; ASCII_RANGE_LEN / BITS_PER_CHUNK],
+    };
+
     /// Called with UTF-8 bytes rather than code points.
     /// Not used for non-ASCII bytes.
     const fn contains(&self, byte: u8) -> bool {
@@ -102,6 +108,18 @@ impl AsciiSet {
     }
 }
 
+impl ops::Add for AsciiSet {
+    type Output = Self;
+
+    fn add(self, other: Self) -> Self {
+        let mut mask = self.mask.clone();
+        for i in 0..mask.len() {
+            mask[i] |= other.mask[i];
+        }
+        AsciiSet { mask }
+    }
+}
+
 /// The set of 0x00 to 0x1F (C0 controls), and 0x7F (DEL).
 ///
 /// Note that this includes the newline and tab characters, but not the space 0x20.
@@ -478,3 +496,16 @@ fn decode_utf8_lossy(input: Cow<'_, [u8]>) -> Cow<'_, str> {
         }
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn add() {
+        let left = AsciiSet::EMPTY.add(b'A');
+        let right = AsciiSet::EMPTY.add(b'B');
+        let expected = AsciiSet::EMPTY.add(b'A').add(b'B');
+        assert_eq!(left + right, expected);
+    }
+}

From b9f44f6a38d36be80af6c24207e8710756df6355 Mon Sep 17 00:00:00 2001
From: Josh McKinney <joshka@users.noreply.github.com>
Date: Wed, 18 Sep 2024 21:26:57 -0700
Subject: [PATCH 2/3] implement ops::Not for AsciiSet

---
 percent_encoding/src/lib.rs | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/percent_encoding/src/lib.rs b/percent_encoding/src/lib.rs
index 72d144bb..34ad8f53 100644
--- a/percent_encoding/src/lib.rs
+++ b/percent_encoding/src/lib.rs
@@ -120,6 +120,15 @@ impl ops::Add for AsciiSet {
     }
 }
 
+impl ops::Not for AsciiSet {
+    type Output = Self;
+
+    fn not(self) -> Self {
+        let mask = self.mask.map(|chunk| !chunk);
+        AsciiSet { mask }
+    }
+}
+
 /// The set of 0x00 to 0x1F (C0 controls), and 0x7F (DEL).
 ///
 /// Note that this includes the newline and tab characters, but not the space 0x20.
@@ -508,4 +517,12 @@ mod tests {
         let expected = AsciiSet::EMPTY.add(b'A').add(b'B');
         assert_eq!(left + right, expected);
     }
+
+    #[test]
+    fn not() {
+        let set = AsciiSet::EMPTY.add(b'A').add(b'B');
+        let not_set = !set;
+        assert!(!not_set.contains(b'A'));
+        assert!(not_set.contains(b'C'));
+    }
 }

From 3407c406ef5f56b41d45a42d1ea9874f6acc93a4 Mon Sep 17 00:00:00 2001
From: Josh McKinney <joshka@users.noreply.github.com>
Date: Wed, 18 Sep 2024 21:49:18 -0700
Subject: [PATCH 3/3] Add const functions for negation / union of AsciiSet

---
 percent_encoding/src/lib.rs | 52 ++++++++++++++++++++++++++++++-------
 1 file changed, 43 insertions(+), 9 deletions(-)

diff --git a/percent_encoding/src/lib.rs b/percent_encoding/src/lib.rs
index 34ad8f53..2213943b 100644
--- a/percent_encoding/src/lib.rs
+++ b/percent_encoding/src/lib.rs
@@ -106,17 +106,30 @@ impl AsciiSet {
         mask[byte as usize / BITS_PER_CHUNK] &= !(1 << (byte as usize % BITS_PER_CHUNK));
         AsciiSet { mask }
     }
+
+    /// Return the union of two sets.
+    pub const fn union(&self, other: Self) -> Self {
+        let mask = [
+            self.mask[0] | other.mask[0],
+            self.mask[1] | other.mask[1],
+            self.mask[2] | other.mask[2],
+            self.mask[3] | other.mask[3],
+        ];
+        AsciiSet { mask }
+    }
+
+    /// Return the negation of the set.
+    pub const fn complement(&self) -> Self {
+        let mask = [!self.mask[0], !self.mask[1], !self.mask[2], !self.mask[3]];
+        AsciiSet { mask }
+    }
 }
 
 impl ops::Add for AsciiSet {
     type Output = Self;
 
     fn add(self, other: Self) -> Self {
-        let mut mask = self.mask.clone();
-        for i in 0..mask.len() {
-            mask[i] |= other.mask[i];
-        }
-        AsciiSet { mask }
+        self.union(other)
     }
 }
 
@@ -124,8 +137,7 @@ impl ops::Not for AsciiSet {
     type Output = Self;
 
     fn not(self) -> Self {
-        let mask = self.mask.map(|chunk| !chunk);
-        AsciiSet { mask }
+        self.complement()
     }
 }
 
@@ -511,7 +523,7 @@ mod tests {
     use super::*;
 
     #[test]
-    fn add() {
+    fn add_op() {
         let left = AsciiSet::EMPTY.add(b'A');
         let right = AsciiSet::EMPTY.add(b'B');
         let expected = AsciiSet::EMPTY.add(b'A').add(b'B');
@@ -519,10 +531,32 @@ mod tests {
     }
 
     #[test]
-    fn not() {
+    fn not_op() {
         let set = AsciiSet::EMPTY.add(b'A').add(b'B');
         let not_set = !set;
         assert!(!not_set.contains(b'A'));
         assert!(not_set.contains(b'C'));
     }
+
+    /// This test ensures that we can get the union of two sets as a constant value, which is
+    /// useful for defining sets in a modular way.
+    #[test]
+    fn union() {
+        const A: AsciiSet = AsciiSet::EMPTY.add(b'A');
+        const B: AsciiSet = AsciiSet::EMPTY.add(b'B');
+        const UNION: AsciiSet = A.union(B);
+        const EXPECTED: AsciiSet = AsciiSet::EMPTY.add(b'A').add(b'B');
+        assert_eq!(UNION, EXPECTED);
+    }
+
+    /// This test ensures that we can get the complement of a set as a constant value, which is
+    /// useful for defining sets in a modular way.
+    #[test]
+    fn complement() {
+        const BOTH: AsciiSet = AsciiSet::EMPTY.add(b'A').add(b'B');
+        const COMPLEMENT: AsciiSet = BOTH.complement();
+        assert!(!COMPLEMENT.contains(b'A'));
+        assert!(!COMPLEMENT.contains(b'B'));
+        assert!(COMPLEMENT.contains(b'C'));
+    }
 }