WIP - add work_loop

marcocapozzoli · marcocapozzoli · commit dff09ac55a25 · 2025-11-18T12:17:33.000-03:00
diff --git a/src/agents/atomdb_broker/AtomDBProcessor.cc b/src/agents/atomdb_broker/AtomDBProcessor.cc
@@ -27,8 +27,8 @@ void AtomDBProcessor::run_command(shared_ptr<BusCommandProxy> proxy) {
     lock_guard<mutex> semaphore(this->query_threads_mutex);
     auto atomdb_proxy = dynamic_pointer_cast<AtomDBProxy>(proxy);
     string thread_id = "thread<" + proxy->my_id() + "_" + std::to_string(proxy->get_serial()) + ">";
-    LOG_INFO("Starting new thread: " << thread_id << " to run command: <" << proxy->get_command()
-                                      << ">");
+    LOG_DEBUG("Starting new thread: " << thread_id << " to run command: <" << proxy->get_command()
+                                     << ">");
     if (this->query_threads.find(thread_id) != this->query_threads.end()) {
         Utils::error("Invalid thread id: " + thread_id);
     } else {
@@ -45,7 +45,7 @@ void AtomDBProcessor::thread_process_one_query(shared_ptr<StoppableThread> monit
         proxy->untokenize(proxy->args);
         string command = proxy->get_command();
         if (command == ServiceBus::ATOMDB) {
-            LOG_INFO("Processing ATOMDB command...");
+            LOG_DEBUG("Processing ATOMDB command...");
             while (!proxy->is_aborting()) {
                 Utils::sleep();
             }
diff --git a/src/agents/atomdb_broker/AtomDBProxy.cc b/src/agents/atomdb_broker/AtomDBProxy.cc
@@ -1,5 +1,7 @@
 #include "AtomDBProxy.h"
 
+#include <numeric>
+
 #include "AtomDBSingleton.h"
 #include "BaseProxy.h"
 #include "Link.h"
@@ -17,22 +19,41 @@ using namespace commons;
 
 // -------------------------------------------------------------------------------------------------
 // Static constants
+queue<vector<Atom*>> ready_batches;
 
-const size_t AtomDBProxy::BATCH_SIZE = 5000;
+const size_t AtomDBProxy::BATCH_SIZE = 100000;
+const size_t AtomDBProxy::NUM_THREADS = 10;
 
 // Proxy Commands
 string AtomDBProxy::ADD_ATOMS = "add_atoms";
+string AtomDBProxy::FLUSH_ATOMS = "flush_atoms";
 
 // -------------------------------------------------------------------------------------------------
 // Constructor and destructor
 
 AtomDBProxy::AtomDBProxy() : BaseProxy() {
     this->command = ServiceBus::ATOMDB;
     this->atomdb = AtomDBSingleton::get_instance();
+
+    for (int i = 0; i < NUM_THREADS; ++i) {
+        this->workers.emplace_back(&AtomDBProxy::worker_loop, this);
+    }
 }
 
 AtomDBProxy::~AtomDBProxy() {
     LOG_INFO("Shutdown AtomDBProxy...");
+    flush_atoms();
+    {
+        unique_lock<mutex> lock(this->queue_mutex);
+        this->stop_processing = true;
+    }
+    this->queue_condition.notify_all();
+
+    for (thread& worker : this->workers) {
+        if (worker.joinable()) {
+            worker.join();
+        }
+    }
     this->abort();
 }
 
@@ -85,41 +106,15 @@ bool AtomDBProxy::from_remote_peer(const string& command, const vector<string>&
     } else if (command == AtomDBProxy::ADD_ATOMS) {
         handle_add_atoms(args);
         return true;
+    } else if (command == AtomDBProxy::FLUSH_ATOMS) {
+        flush_atoms();
+        return true;
     } else {
         Utils::error("Invalid AtomDBProxy command: <" + command + ">");
         return false;
     }
 }
 
-void AtomDBProxy::handle_add_atoms(const vector<string>& tokens) {
-    vector<Atom*> atoms;
-    try {
-        atoms = build_atoms_from_tokens(tokens);
-        LOG_INFO("Processing " << atoms.size() << " atoms...");
-
-        if (atoms.empty()) {
-            LOG_INFO("No atoms were built from tokens. Nothing to process.");
-            return;
-        }
-
-        this->atomdb->add_atoms(atoms, false, true);
-
-        LOG_DEBUG("Cleaning up " << atoms.size() << " atom pointers after successful processing.");
-        for (Atom* atom : atoms) {
-            delete atom;
-        }
-        atoms.clear();
-
-        LOG_INFO("Finished processing all atoms.");
-
-    } catch (const exception& e) {
-        LOG_ERROR("Error processing atoms: " << e.what());
-        for (Atom* atom : atoms) {
-            delete atom;
-        }
-    }
-}
-
 vector<Atom*> AtomDBProxy::build_atoms_from_tokens(const vector<string>& tokens) {
     vector<Atom*> atoms;
     string current;
@@ -148,3 +143,117 @@ vector<Atom*> AtomDBProxy::build_atoms_from_tokens(const vector<string>& tokens)
 
     return atoms;
 }
+
+// void AtomDBProxy::handle_add_atoms(const vector<string>& tokens) {
+//     vector<Atom*> atoms;
+//     try {
+//         atoms = build_atoms_from_tokens(tokens);
+//         LOG_INFO("Processing " << atoms.size() << " atoms...");
+
+//         if (atoms.empty()) {
+//             LOG_INFO("No atoms were built from tokens. Nothing to process.");
+//             return;
+//         }
+
+//         this->atomdb->add_atoms(atoms, false, true);
+
+//         LOG_DEBUG("Cleaning up " << atoms.size() << " atom pointers after successful processing.");
+//         for (Atom* atom : atoms) {
+//             delete atom;
+//         }
+//         atoms.clear();
+
+//         LOG_INFO("Finished processing all atoms.");
+
+//     } catch (const exception& e) {
+//         LOG_ERROR("Error processing atoms: " << e.what());
+//         for (Atom* atom : atoms) {
+//             delete atom;
+//         }
+//     }
+// }
+
+void AtomDBProxy::handle_add_atoms(const vector<string>& tokens) {
+    vector<Atom*> atoms = build_atoms_from_tokens(tokens);
+    LOG_INFO("Received " << atoms.size() << " atoms from peer " << this->peer_id());
+    add_work(move(atoms));
+}
+
+void AtomDBProxy::flush_atoms() {
+    unique_lock<mutex> lock(this->queue_mutex);
+
+    if (this->work_queue.empty()) {
+        LOG_INFO("[Flush] Received flush command, but accumulator is empty. Nothing to do.");
+        return;
+    }
+
+    LOG_INFO("[Flush] Received flush command. Flushing " << this->work_queue.size()
+                                                         << " remaining batches from accumulator.");
+
+    size_t total_remaining = accumulate(
+        this->work_queue.begin(), this->work_queue.end(), size_t{0}, [](size_t sum, const auto& batch) {
+            return sum + batch.size();
+        });
+
+    vector<Atom*> final_batch;
+    final_batch.reserve(total_remaining);
+    for (auto& batch : this->work_queue) {
+        final_batch.insert(
+            final_batch.end(), make_move_iterator(batch.begin()), make_move_iterator(batch.end()));
+    }
+
+    this->work_queue.clear();
+    ready_batches.push(move(final_batch));
+
+    this->queue_condition.notify_one();
+}
+
+void AtomDBProxy::add_work(vector<Atom*> atoms) {
+    if (atoms.empty()) return;
+    unique_lock<mutex> lock(this->queue_mutex);
+    this->work_queue.push_back(move(atoms));
+    
+    size_t total_in_queue = accumulate(
+        this->work_queue.begin(), this->work_queue.end(), size_t{0}, [](size_t sum, const auto& batch) {
+            return sum + batch.size();
+        });
+    
+    if (total_in_queue < BATCH_SIZE) return;
+    
+    LOG_DEBUG("[Accumulator] Batch target reached. Total: " << total_in_queue
+                                                           << ". Creating super-batch.");
+    
+    vector<Atom*> final_batch;
+    final_batch.reserve(total_in_queue);
+    
+    for (auto& batch : this->work_queue) {
+        final_batch.insert(
+            final_batch.end(), make_move_iterator(batch.begin()), make_move_iterator(batch.end()));
+    }
+    
+    this->work_queue.clear();
+    ready_batches.push(move(final_batch));
+    this->queue_condition.notify_one();
+}
+
+void AtomDBProxy::worker_loop() {
+    while (true) {
+        vector<Atom*> batch_to_process;
+        {
+            unique_lock<mutex> lock(this->queue_mutex);
+            this->queue_condition.wait(
+                lock, [this] { return this->stop_processing || !ready_batches.empty(); });
+            if (this->stop_processing && ready_batches.empty()) return;
+            batch_to_process = move(ready_batches.front());
+            ready_batches.pop();
+        }
+        LOG_INFO("[Thread " << this_thread::get_id() << "] Processing batch with "
+                            << batch_to_process.size() << " atoms.");
+        try {
+            this->atomdb->add_atoms(batch_to_process, false, true);
+            LOG_INFO("[Thread " << this_thread::get_id() << "] batch processed successfully.");
+        } catch (const exception& e) {
+            LOG_ERROR("Error processing batch: " << e.what());
+        }
+    }
+}
diff --git a/src/agents/atomdb_broker/AtomDBProxy.h b/src/agents/atomdb_broker/AtomDBProxy.h
@@ -1,8 +1,10 @@
 #pragma once
 
+#include <condition_variable>
 #include <memory>
 #include <mutex>
 #include <string>
+#include <thread>
 #include <vector>
 
 #include "Atom.h"
@@ -28,6 +30,7 @@ class AtomDBProxy : public BaseProxy {
     // ---------------------------------------------------------------------------------------------
     // Proxy Commands
     static string ADD_ATOMS;
+    static string FLUSH_ATOMS;
 
     // ---------------------------------------------------------------------------------------------
     // Constructor and destructor
@@ -114,11 +117,22 @@ class AtomDBProxy : public BaseProxy {
      * reported back to the peer.
      */
     void handle_add_atoms(const vector<string>& args);
+    void flush_atoms();
+    void add_work(vector<Atom*> atoms);
+    void worker_loop();
 
     mutex api_mutex;
     shared_ptr<AtomDB> atomdb;
 
     static const size_t BATCH_SIZE;
+    static const size_t NUM_THREADS;
+
+    vector<thread> workers;
+    bool stop_processing = false;
+
+    vector<vector<Atom*>> work_queue;
+    mutex queue_mutex;
+    condition_variable queue_condition;
 };
 
 }  // namespace atomdb_broker