smallcloudai · yplilya · Mar 26, 2025 · Mar 26, 2025 · Mar 27, 2025 · Mar 25, 2025
diff --git a/docs/.astro/types.d.ts b/docs/.astro/types.d.ts
diff --git a/docs/astro.config.mjs b/docs/astro.config.mjs
@@ -62,6 +62,13 @@ export default defineConfig({
                 'aria-label': 'Get started with Refact'
               }
             },
+            {
+              label: 'Usage Based Pricing',
+              link: '/guides/usage-based-pricing/',
+              attrs: {
+                'aria-label': 'Learn about Usage Based Pricing'
+              }
+            },
             {
               label: 'Installation',
               collapsed: true,

diff --git a/docs/public/.keep b/docs/public/.keep
@@ -0,0 +1 @@
+(placeholder to create public directory)
diff --git a/docs/public/assets/byok_1.png b/docs/public/assets/byok_1.png
diff --git a/docs/public/assets/byok_2.png b/docs/public/assets/byok_2.png
diff --git a/docs/src/assets/byok_1.png b/docs/src/assets/byok_1.png
diff --git a/docs/src/assets/byok_2.png b/docs/src/assets/byok_2.png
diff --git a/docs/src/content/docs/byok.md b/docs/src/content/docs/byok.md
@@ -3,22 +3,20 @@ title: "Bring Your Own Key (BYOK)"
 ---
 
 ## Introduction
-Bring Your Own Key (BYOK) allows users to specify their API keys and select models for chat, completion, and embedding tasks across different AI platforms. This feature enables seamless integration with various services while maintaining control over API keys.
-The Bring Your Own Key (BYOK) feature allows users to specify their API keys and select models for chat, completion, and embedding tasks across various AI platforms. This functionality ensures seamless integration with different services while providing users with control over their API keys.
-
-## Configuration Steps
-
-Select the **Bring Your Own Key** option and click the **Next** button.
 
-![Refact BYOK Login page](../../assets/byok_login_start.png)
-
-Click on **Edit BYOK file** to open the `bring-your-own-key.yaml` file:
+Bring Your Own Key (BYOK) allows users to specify their API keys and select models for chat, completion, and embedding tasks across different AI platforms. This feature enables seamless integration with various services while maintaining control over API keys.
 
-![Refact edit BYOK](../../assets/edit_byok.png)
+## How to Switch Providers in the Plugin
 
-In the file, specify your API key, model name, and endpoints as necessary.
+By default, your provider is Refact.ai Cloud. If you want to switch from it, follow these steps:
 
-![Bring your own key](../../assets/byok.png)
+1. Navigate to the "Burger" button in the right upper corner of the plugin interface and click it.
+2. Go to the "Configure providers" tab and click it.<br>
+   <img src="../../assets/byok_1.png" alt="Configure providers tab" style="max-height:33vh; display:block; margin:1em 0; border:3px solid #e74c3c; border-radius:8px;">
+3. Choose the provider you want to add from the list.<br>
+   <img src="../../assets/byok_2.png" alt="Choose provider" style="max-height:33vh; display:block; margin:1em 0; border:3px solid #e74c3c; border-radius:8px;">
+4. You can enable or disable providers and delete them if needed.
 
 ## Additional Resources
-For more examples and configurations, please visit the [Refact GitHub repository](https://github.com/smallcloudai/refact-lsp/tree/main/bring_your_own_key).
+
+For more examples and configurations, please visit the [Refact GitHub repository](https://github.com/smallcloudai/refact-lsp/tree/main/bring_your_own_key).
diff --git a/docs/src/content/docs/guides/usage-based-pricing.md b/docs/src/content/docs/guides/usage-based-pricing.md
@@ -0,0 +1,130 @@
+---
+title: Usage Based Pricing
+sidebar_label: Usage Based Pricing
+description: Learn about Refact.ai's new usage-based pricing model, how much you will be charged, and see a detailed per-model cost table.
+---
+
+Refact.ai uses a usage-based pricing system with coins. This page explains how coins work, how much you will be charged for different actions, and how to estimate your costs for each available model.
+
+## How Coins Work
+
+- **Coins are the unit of usage in Refact.ai.**
+- **$1 = 1,000 coins.**
+- You are only charged for the actual work performed by the AI Agent: simple tasks use fewer coins, complex ones use more.
+- You choose the AI model for each task, and can stop tasks at any time to save coins.
+- **Autocompletion is unlimited and free for all users.**
+
+## Pricing Table (per 1M tokens)
+
+<div id="pricing-toggle" style="margin-bottom: 1em;">
+  <button id="show-coins" style="margin-right: 0.5em;">Show in Coins</button>
+  <button id="show-dollars">Show in Dollars</button>
+</div>
+
+<table id="pricing-table">
+  <thead>
+    <tr>
+      <th>Model</th>
+      <th>Input Tokens</th>
+      <th>Output Tokens</th>
+      <th>Cache Read</th>
+      <th>Cache Write</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>GPT-4o</td>
+      <td data-coins="2500" data-dollars="2.50">$2.50</td>
+      <td data-coins="10000" data-dollars="10.00">$10.00</td>
+      <td data-coins="1250" data-dollars="1.25">$1.25</td>
+      <td>-</td>
+    </tr>
+    <tr>
+      <td>GPT-4o-mini</td>
+      <td data-coins="150" data-dollars="0.15">$0.15</td>
+      <td data-coins="600" data-dollars="0.60">$0.60</td>
+      <td data-coins="75" data-dollars="0.075">$0.075</td>
+      <td>-</td>
+    </tr>
+    <tr>
+      <td>GPT-4.1</td>
+      <td data-coins="2000" data-dollars="2.00">$2.00</td>
+      <td data-coins="8000" data-dollars="8.00">$8.00</td>
+      <td data-coins="500" data-dollars="0.50">$0.50</td>
+      <td>-</td>
+    </tr>
+    <tr>
+      <td>Claude 3.7 Sonnet</td>
+      <td data-coins="3000" data-dollars="3.00">$3.00</td>
+      <td data-coins="15000" data-dollars="15.00">$15.00</td>
+      <td data-coins="300" data-dollars="0.30">$0.30</td>
+      <td data-coins="3750" data-dollars="3.75">$3.75</td>
+    </tr>
+    <tr>
+      <td>Claude 3.5 Sonnet</td>
+      <td data-coins="3000" data-dollars="3.00">$3.00</td>
+      <td data-coins="15000" data-dollars="15.00">$15.00</td>
+      <td data-coins="300" data-dollars="0.30">$0.30</td>
+      <td data-coins="3750" data-dollars="3.75">$3.75</td>
+    </tr>
+    <tr>
+      <td>o3-mini</td>
+      <td data-coins="1100" data-dollars="1.10">$1.10</td>
+      <td data-coins="4400" data-dollars="4.40">$4.40</td>
+      <td data-coins="550" data-dollars="0.55">$0.55</td>
+      <td>-</td>
+    </tr>
+  </tbody>
+</table>
+
+<script>
+const showCoinsBtn = document.getElementById('show-coins');
+const showDollarsBtn = document.getElementById('show-dollars');
+const table = document.getElementById('pricing-table');
+function setTable(mode) {
+  for (const row of table.tBodies[0].rows) {
+    for (const cell of row.cells) {
+      if (cell.dataset.coins && cell.dataset.dollars) {
+        cell.textContent = mode === 'coins'
+          ? cell.dataset.coins + ' coins'
+          : '$' + Number(cell.dataset.dollars).toFixed(2);
+      }
+    }
+  }
+}
+showCoinsBtn.onclick = () => setTable('coins');
+showDollarsBtn.onclick = () => setTable('dollars');
+</script>
+
+> **Note:** 1,000 coins = $1. For example, generating 10,000 output tokens with GPT-4o would cost 10,000 coins (or $10).
+
+> **Note:** 1,000 coins = $1. For example, generating 10,000 output tokens with GPT-4o would cost 150 coins ($0.15).
+
+## Plans and Coin Grants
+
+| Plan           | Monthly Coins | Details |
+|----------------|--------------|---------|
+| Free           | 5,000        | Complimentary $5 (5,000-coin) starter grant to explore the full capabilities of Refact.ai Agent. |
+| Pro            | 10,000+      | $10/month = 10,000 coins. Pro users can increase their monthly limits by 2×, 3×, 4×, or 5× (e.g., $20 = 20,000 coins; $30 = 30,000 coins, etc.). You will receive exactly 2, 3, 4, or 5 times the coins for the corresponding plan multiplier. Unused coins roll over to the next month. One-time top-ups are available from $5. |
+
+## What’s Included in Each Plan
+
+| FREE           | PRO |
+|----------------|-----|
+| $0 / month     | $10 / month |
+| 5,000 coins to use AI Agent & Chat | 10,000 coins renewed every month; unused coins roll over |
+| In-IDE chat aware of your codebase context | Top up from $5 in your account ($1 = 1,000 coins) |
+| Claude 3.7, GPT 4.1, 4o, Gemini 2.5 pro, and more | Subscribe to a 2x-5x Pro plan to top up automatically |
+| Unlimited fast auto-completion | |
+| Codebase-aware vector database (RAG) | |
+| Self-hosting option available | |
+| Discord support | |
+
+## Bring Your Own Key (BYOK)
+
+If you prefer to use your own API key (for OpenAI, Anthropic, or local models), you can connect it to Refact.ai. When using BYOK, requests are billed by your provider and do not consume Refact.ai coins.
+
+**No commission:** For now, Refact.ai does not take any commission or markup on API usage. You pay only for the actual API cost of the model you use.
+
+For more information on how to use Bring Your Own Key (BYOK), see the [BYOK documentation](https://github.com/smallcloudai/refact/blob/main/docs/byok.md) in the repository.
+
diff --git a/docs/src/content/docs/supported-models.md b/docs/src/content/docs/supported-models.md
@@ -8,12 +8,14 @@ description: Supported Models in Refact.ai
 With Refact.ai, access state-of-the-art models in your VS Code or JetBrains plugin and select the optimal LLM for each task.
 
 ### AI Agent models
+- GPT 4.1 (default)
 - Claude 3.7 Sonnet
 - Claude 3.5 Sonnet
 - GPT-4o
 - o3-mini
 
 ### Chat models
+- GPT 4.1 (default)
 - Claude 3.7 Sonnet
 - Claude 3.5 Sonnet
 - GPT-4o

diff --git a/refact-agent/engine/Cargo.toml b/refact-agent/engine/Cargo.toml
@@ -6,7 +6,7 @@ lto = true
 
 [package]
 name = "refact-lsp"
-version = "0.10.14"
+version = "0.10.17"
 edition = "2021"
 build = "build.rs"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
@@ -19,6 +19,7 @@ vecdb = ["sqlite-vec"]
 shadow-rs = "0.36.0"
 
 [dependencies]
+astral-tokio-tar = "0.5.2"
 axum = { version = "0.6.20", features = ["default", "http2"] }
 async-process = "2.0.1"
 async-stream = "0.3.5"
@@ -77,7 +78,6 @@ tokenizers = "0.21.0"
 tokio = { version = "1.43.0", features = ["fs", "io-std", "io-util", "macros", "rt-multi-thread", "signal", "process"] }
 tokio-rusqlite = "0.5.0"
 tokio-util = { version = "0.7.12", features = ["compat"] }
-tokio-tar = "0.3.1"
 tower = { version = "0.4", features = ["full"] }
 tower-http = { version = "0.4.0", features = ["cors"] }
 tower-lsp = "0.20"

diff --git a/refact-agent/engine/docker/lsp-release.Dockerfile b/refact-agent/engine/docker/lsp-release.Dockerfile
@@ -0,0 +1,32 @@
+# This dockerfile can be used to compile refact-lsp for development purposes, 
+# for example, to get refact-lsp to bind into docker containers to start threads in containers
+
+FROM lukemathwalker/cargo-chef:latest-rust-alpine3.21 AS chef
+
+FROM chef AS planner
+WORKDIR /refact-lsp
+COPY . .
+RUN cargo chef prepare --recipe-path recipe.json
+
+FROM chef AS builder
+WORKDIR /refact-lsp
+COPY --from=planner /refact-lsp/recipe.json recipe.json
+
+RUN apk add --no-cache \
+    build-base \
+    openssl-dev \
+    openssl-libs-static \
+    pkgconfig \
+    zlib-static
+
+COPY ./docker/fix_sqlite_vec.h .
+
+ENV CFLAGS="-include /refact-lsp/fix_sqlite_vec.h"
+
+RUN cargo chef cook --release --recipe-path recipe.json
+
+COPY . .
+
+RUN cargo build --release
+
+RUN mkdir -p /output && mv target/release/refact-lsp /output/
diff --git a/refact-agent/engine/python_binding_and_cmdline/refact/chat_client.py b/refact-agent/engine/python_binding_and_cmdline/refact/chat_client.py
@@ -259,7 +259,7 @@ async def ask_using_http(
         # meta["current_config_file"] = "/Users/user/.config/refact/integrations.d/postgres.yaml"
     post_me["meta"] = meta
     choices: List[Optional[Message]] = [None] * n_answers
-    async with aiohttp.ClientSession() as session:
+    async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=5000)) as session:
         async with session.post(base_url + "/chat", json=post_me) as response:
             if not stream:
                 text = await response.text()
@@ -306,15 +306,35 @@ async def ask_using_http(
                     # print(">>>", line_str)
                     if "choices" in j and len(j["choices"]) > 0:
                         if "usage" in j and j.get("usage") is not None:
-                            have_usage = Usage(**j["usage"])
+                            temp_usage = Usage(**j["usage"])
+                            if have_usage is not None:
+                                # merge usage by getting max for each field
+                                have_usage = Usage(
+                                    prompt_tokens=max(have_usage.prompt_tokens, temp_usage.prompt_tokens),
+                                    completion_tokens=max(have_usage.completion_tokens, temp_usage.completion_tokens),
+                                    cache_creation_input_tokens=max(have_usage.cache_creation_input_tokens, temp_usage.cache_creation_input_tokens),
+                                    cache_read_input_tokens=max(have_usage.cache_read_input_tokens, temp_usage.cache_read_input_tokens),
+                                )
+                            else:
+                                have_usage = temp_usage
                         deltas_collector.add_deltas(j["choices"])
                     elif "role" in j:
                         deterministic.append(Message(**j))
                     elif "subchat_id" in j:
                         map_key = j["tool_call_id"] + "__" + j["subchat_id"]
                         subchats[map_key].append(Message(**j["add_message"]))
                     elif j.get("usage") is not None:
-                        have_usage = Usage(**j["usage"])
+                        temp_usage = Usage(**j["usage"])
+                        if have_usage is not None:
+                            # merge usage by getting max for each field
+                            have_usage = Usage(
+                                prompt_tokens=max(have_usage.prompt_tokens, temp_usage.prompt_tokens),
+                                completion_tokens=max(have_usage.completion_tokens, temp_usage.completion_tokens),
+                                cache_creation_input_tokens=max(have_usage.cache_creation_input_tokens, temp_usage.cache_creation_input_tokens),
+                                cache_read_input_tokens=max(have_usage.cache_read_input_tokens, temp_usage.cache_read_input_tokens),
+                            )
+                        else:
+                            have_usage = temp_usage
                     else:
                         print("unrecognized streaming data (2):", j)
                     if callback is not None:
@@ -563,18 +583,21 @@ def _wrap_color(s: str, color: str = "red") -> str:
                 con(t)
 
         elif m.role == "diff" and m.content is not None:
-            for chunk in json.loads(m.content):
-                message = f"{chunk['file_name']}:{chunk['line1']}-{chunk['line2']}"
-                message_str.append(message)
-                con(message)
-                if len(chunk["lines_add"]) > 0:
-                    message = "\n".join([f"+{line}" for line in chunk['lines_add'].splitlines()])
-                    message_str.append(message)
-                    con(_wrap_color(message, "green"))
-                if len(chunk["lines_remove"]) > 0:
-                    message = "\n".join([f"-{line}" for line in chunk['lines_remove'].splitlines()])
+            try:
+                for chunk in json.loads(m.content):
+                    message = f"{chunk['file_name']}:{chunk['line1']}-{chunk['line2']}"
                     message_str.append(message)
-                    con(_wrap_color(message, "red"))
+                    con(message)
+                    if len(chunk["lines_add"]) > 0:
+                        message = "\n".join([f"+{line}" for line in chunk['lines_add'].splitlines()])
+                        message_str.append(message)
+                        con(_wrap_color(message, "green"))
+                    if len(chunk["lines_remove"]) > 0:
+                        message = "\n".join([f"-{line}" for line in chunk['lines_remove'].splitlines()])
+                        message_str.append(message)
+                        con(_wrap_color(message, "red"))
+            except:
+                con(f"Error while diff rendering: {m.content}")
 
         elif m.role in ["tool", "user", "assistant", "system", "cd_instruction"]:
             if m.subchats is not None:  # actually subchats can only appear in role="tool", but code is the same anyway

diff --git a/refact-agent/engine/python_binding_and_cmdline/refact/lsp_runner.py b/refact-agent/engine/python_binding_and_cmdline/refact/lsp_runner.py
@@ -51,7 +51,7 @@ async def start(self):
             t0 = time.time()
             if self._verbose:
                 print("REFACT LSP start", program, " ".join(args))
-            self._refact_lsp_process = await asyncio.create_subprocess_exec(program, *args, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, limit=1024*1024)
+            self._refact_lsp_process = await asyncio.create_subprocess_exec(program, *args, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, limit=1024*1024*64)
             ast_ok, vecdb_ok, post_listening, post_busy = False, False, False, False
             while True:
                 while True:

diff --git a/refact-agent/engine/src/agentic/generate_follow_up_message.rs b/refact-agent/engine/src/agentic/generate_follow_up_message.rs
@@ -2,10 +2,12 @@ use std::sync::Arc;
 use serde::Deserialize;
 use tokio::sync::{RwLock as ARwLock, Mutex as AMutex};
 
+use crate::custom_error::MapErrToString;
 use crate::global_context::GlobalContext;
 use crate::at_commands::at_commands::AtCommandsContext;
 use crate::subchat::subchat_single;
 use crate::call_validation::{ChatContent, ChatMessage};
+use crate::json_utils;
 
 const PROMPT: &str = r#"
 Your task is to do two things for a conversation between a user and an assistant:
@@ -117,6 +119,7 @@ pub async fn generate_follow_up_message(
 
     tracing::info!("follow-up model says {:?}", response);
 
-    let response: FollowUpResponse = serde_json::from_str(&response).map_err(|e| e.to_string())?;
+    let response: FollowUpResponse = json_utils::extract_json_object(&response)
+        .map_err_with_prefix("Failed to parse json:")?;
     Ok(response)
 }
diff --git a/refact-agent/engine/src/cached_tokenizers.rs b/refact-agent/engine/src/cached_tokenizers.rs
@@ -144,8 +144,12 @@ pub async fn cached_tokenizer(
     let to = tokenizer_cache_dir.join(model_name.clone()).join("tokenizer.json");
     let http_path = {
         let caps_locked = caps.read().unwrap();
-        let rewritten_model_name = caps_locked.tokenizer_rewrite_path.get(&model_name).unwrap_or(&model_name);
-        caps_locked.tokenizer_path_template.replace("$MODEL", rewritten_model_name)
+        if caps_locked.tokenizer_path_template.is_empty() {
+            caps_locked.tokenizer_rewrite_path.get(&model_name).unwrap_or(&model_name).clone()
+        } else {
+            let rewritten_model_name = caps_locked.tokenizer_rewrite_path.get(&model_name).unwrap_or(&model_name);
+            caps_locked.tokenizer_path_template.replace("$MODEL", rewritten_model_name)
+        }
     };
     try_download_tokenizer_file_and_open(&client2, http_path.as_str(), api_key.clone(), &to).await?;
     info!("loading tokenizer \"{}\"", to.display());