theintrance · BayernMuller · Dec 28, 2025 · Dec 28, 2025 · Dec 28, 2025 · Dec 28, 2025
diff --git a/src/dom/dom_indexer.cc b/src/dom/dom_indexer.cc
@@ -37,4 +37,9 @@ std::optional<NodeList> DOMIndexer::GetNodesByClass(std::string_view class_name)
   return it != class_index_.end() ? std::make_optional(it->second) : std::nullopt;
 }
 
+std::optional<NodeList> DOMIndexer::GetNodesByAttribute(std::string_view attribute_name) const {
+  auto it = attr_index_.find(std::string(attribute_name));
+  return it != attr_index_.end() ? std::make_optional(it->second) : std::nullopt;
+}
+
 }  // namespace arboris
diff --git a/src/dom/dom_indexer.hpp b/src/dom/dom_indexer.hpp
@@ -30,12 +30,15 @@ class DOMIndexer {
   [[nodiscard]] NodePtr GetNodeById(std::string_view id) const;
   [[nodiscard]] std::optional<NodeList> GetNodesByTag(Tag tag) const;
   [[nodiscard]] std::optional<NodeList> GetNodesByClass(std::string_view class_name) const;
+  [[nodiscard]] std::optional<NodeList> GetNodesByAttribute(std::string_view attribute_name) const;
 
  private:
   // TODO(team): consider using std::list instead of std::vector for indexes
   std::unordered_map<std::string, NodePtr> id_index_;
   std::unordered_map<Tag, NodeList> tag_index_;
   std::unordered_map<std::string, NodeList> class_index_;
+
+  // TODO(team): consider indexing by value instead of name
   std::unordered_map<std::string, NodeList> attr_index_;
 };
 

diff --git a/src/dom/dom_query.cc b/src/dom/dom_query.cc
@@ -8,17 +8,31 @@
 
 #include <optional>
 #include <string>
+#include <limits>
 #include <vector>
+#include <utility>
+
+#include "utils/set_utils.hpp"
 
 namespace arboris {
 
 std::optional<DOMQuery> DOMQuery::Find(const QueryOptions& options) const {
   // TODO(team): Implement this
-  return DOMQuery(root_, dom_indexer_);
+  auto candidates = searchCandidatesFromIndexer(options);
+  if (candidates.empty()) {
+    return std::nullopt;
+  }
+
+  for (const auto& candidate : candidates) {
+    if (matchAllConditions(candidate, options)) {
+      return DOMQuery(*candidate, dom_indexer_);
+    }
+  }
+
+  return std::nullopt;
 }
 
 std::optional<DOMQuery> DOMQuery::Find(const std::string& id) const {
-  // TODO(team): Implement this
   NodePtr node = dom_indexer_.get().GetNodeById(id);
   if (node) {
     return DOMQuery(*node, dom_indexer_);
@@ -30,9 +44,86 @@ std::optional<DOMQuery> DOMQuery::Find(const std::string& id) const {
 std::vector<DOMQuery> DOMQuery::FindAll(const QueryOptions& options) const {
   std::vector<DOMQuery> ret;
 
-  const auto& tag_filtered_list = dom_indexer_.get().GetNodesByTag(options.tag.value());
-
+  auto candidates = searchCandidatesFromIndexer(options);
+  for (const auto& candidate : candidates) {
+    if (matchAllConditions(candidate, options)) {
+      ret.push_back(DOMQuery(*candidate, dom_indexer_));
+    }
+  }
   return ret;
 }
 
+NodeList DOMQuery::searchCandidatesFromIndexer(const QueryOptions& options) const {
+  std::size_t min_size = std::numeric_limits<std::size_t>::max();
+  NodeList min_candidates;
+
+  if (options.tag.has_value()) {
+    auto nodes = dom_indexer_.get().GetNodesByTag(options.tag.value());
+    if (nodes.has_value()) {
+      if (nodes->size() < min_size) {
+        min_size = nodes->size();
+        min_candidates = std::move(*nodes);
+      }
+    }
+  }
+
+  if (options.classes.has_value()) {
+    for (const auto& class_name : *options.classes) {
+      auto nodes = dom_indexer_.get().GetNodesByClass(class_name);
+      if (nodes.has_value()) {
+        if (nodes->size() < min_size) {
+          min_size = nodes->size();
+          min_candidates = std::move(*nodes);
+        }
+      }
+    }
+  }
+
+  if (options.attributes.has_value()) {
+    for (const auto& [attribute_name, _] : options.attributes.value()) {
+      auto nodes = dom_indexer_.get().GetNodesByAttribute(attribute_name);
+      if (nodes.has_value()) {
+        if (nodes->size() < min_size) {
+          min_size = nodes->size();
+          min_candidates = std::move(*nodes);
+        }
+      }
+    }
+  }
+
+  if (min_candidates.empty()) {
+    return {};
+  }
+
+  return min_candidates;
+}
+
+bool DOMQuery::matchAllConditions(const NodePtr& node, const QueryOptions& options) const {
+  if (!isSubNode(node)) {
+    return false;
+  }
+
+  if (options.tag && node->tag() != options.tag.value()) {
+    return false;
+  }
+
+  if (options.classes && !IsSubset(options.classes.value(), node->classes())) {
+    return false;
+  }
+
+  if (options.attributes && !IsSubset(options.attributes.value(), node->attributes())) {
+    return false;
+  }
+  // TODO(team): Implement text condition matching
+  return true;
+}
+
+bool DOMQuery::isSubNode(const NodePtr& node) const {
+  const uint32_t node_in = node->in();
+  const uint32_t node_out = node->out();
+  const uint32_t root_in = root_.get().in();
+  const uint32_t root_out = root_.get().out();
+  return node_in >= root_in && node_out <= root_out;
+}
+
 }  // namespace arboris
diff --git a/src/dom/dom_query.hpp b/src/dom/dom_query.hpp
@@ -39,8 +39,12 @@ class DOMQuery {
   std::vector<DOMQuery> FindAll(const QueryOptions& options) const;
 
  private:
-    std::reference_wrapper<const TagNode> root_;
-    std::reference_wrapper<const DOMIndexer> dom_indexer_;
+  NodeList searchCandidatesFromIndexer(const QueryOptions& options) const;
+  bool matchAllConditions(const NodePtr& node, const QueryOptions& options) const;
+  inline bool isSubNode(const NodePtr& node) const;
+
+  std::reference_wrapper<const TagNode> root_;
+  std::reference_wrapper<const DOMIndexer> dom_indexer_;
 };
 
 }  // namespace arboris

diff --git a/src/dom/tag_node.hpp b/src/dom/tag_node.hpp
@@ -10,6 +10,7 @@
 #include <memory>
 #include <string>
 #include <unordered_map>
+#include <unordered_set>
 #include <utility>
 #include <vector>
 
@@ -29,11 +30,11 @@ class TagNode final : public BaseNode {
     return children_;
   }
 
-  [[nodiscard]] const std::unordered_map<std::string, std::string>& attributes() const noexcept {
+  [[nodiscard]] const AttributeMap& attributes() const noexcept {
     return html_token_.attributes;
   }
 
-  [[nodiscard]] const std::vector<std::string>& classes() const noexcept {
+  [[nodiscard]] const ClassSet& classes() const noexcept {
     return html_token_.classes;
   }
 

diff --git a/src/utils/html_tokens.hpp b/src/utils/html_tokens.hpp
@@ -9,23 +9,27 @@
 
 #include <string>
 #include <string_view>
-#include <unordered_map>
 #include <vector>
+#include <unordered_map>
+#include <unordered_set>
 
 #include "utils/tag.hpp"
 #include "utils/tokens.hpp"
 
 namespace arboris {
 
+using AttributeMap = std::unordered_map<std::string, std::unordered_set<std::string>>;
+using ClassSet = std::unordered_set<std::string>;
+
 struct BaseHtmlToken : public BaseToken {};
 
 struct HtmlToken : public BaseHtmlToken {
   Tag tag = Tag::kUnknown;
   bool is_void_tag = false;
 
   // TODO(team): Consider using string_views with an external string pool
-  std::unordered_map<std::string, std::string> attributes;
-  std::vector<std::string> classes;
+  AttributeMap attributes;
+  ClassSet classes;
   std::string id;
 };
 

diff --git a/src/utils/query_options.hpp b/src/utils/query_options.hpp
@@ -8,13 +8,14 @@
 #define SRC_UTILS_QUERY_OPTIONS_HPP_
 
 #include <optional>
-#include <vector>
+#include <unordered_set>
 #include <string>
 #include <string_view>
 #include <utility>
 #include <functional>
 
 #include "utils/tag.hpp"
+#include "utils/html_tokens.hpp"
 
 namespace arboris {
 
@@ -40,8 +41,8 @@ class TextQueryCondition {
 
 struct QueryOptions {
   std::optional<Tag> tag;
-  std::optional<std::vector<std::string>> classes;
-  std::optional<std::vector<std::pair<std::string, std::string>>> attributes;
+  std::optional<ClassSet> classes;
+  std::optional<AttributeMap> attributes;
   std::optional<TextQueryCondition> text;
 };
 

diff --git a/src/utils/set_utils.hpp b/src/utils/set_utils.hpp
@@ -0,0 +1,48 @@
+/*
+ *   Copyright 2025 Team Arboris
+ *   Licensed under the Apache License, Version 2.0
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ */
+
+#ifndef SRC_UTILS_SET_UTILS_HPP_
+#define SRC_UTILS_SET_UTILS_HPP_
+
+#include <unordered_map>
+#include <unordered_set>
+
+namespace arboris {
+
+template <typename T>
+bool IsSubset(const std::unordered_set<T>& subset,
+              const std::unordered_set<T>& super_set) {
+  if (subset.size() > super_set.size()) {
+    return false;
+  }
+
+  for (const auto& x : subset) {
+    // std::unordered_set::contains is available in C++20
+    if (!super_set.contains(x)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+template <typename T, typename U>
+bool IsSubset(const std::unordered_map<T, std::unordered_set<U>>& subset,
+              const std::unordered_map<T, std::unordered_set<U>>& super_set) {
+  for (const auto& [key, value_set] : subset) {
+    auto it = super_set.find(key);
+    if (it == super_set.end()) {
+      return false;
+    }
+    if (!IsSubset(value_set, it->second)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+}  // namespace arboris
+
+#endif  // SRC_UTILS_SET_UTILS_HPP_