diff --git a/src/engine/llm-guard.ts b/src/engine/llm-guard.ts
new file mode 100644
index 0000000..883b7aa
--- /dev/null
+++ b/src/engine/llm-guard.ts
@@ -0,0 +1,39 @@
+const RETRYABLE_STATUSES = new Set([429, 500, 502, 503, 529]);
+const PAUSING_STATUSES = new Set([400, 401, 403, 404, 422]);
+
+export function extractLlmStatus(error: unknown): number | null {
+  const text = String(error ?? "");
+  const match = text.match(/\bLLM API (\d{3})\b/);
+  if (!match) return null;
+  return Number(match[1]);
+}
+
+export class LlmFailureGuard {
+  private pausedUntil = 0;
+
+  constructor(
+    private readonly cooldownMs = 10 * 60_000,
+    private readonly now = () => Date.now(),
+  ) {}
+
+  canRun(): boolean {
+    return this.now() >= this.pausedUntil;
+  }
+
+  remainingMs(): number {
+    return Math.max(0, this.pausedUntil - this.now());
+  }
+
+  reset(): void {
+    this.pausedUntil = 0;
+  }
+
+  tripIfNeeded(error: unknown): boolean {
+    const status = extractLlmStatus(error);
+    if (status == null || RETRYABLE_STATUSES.has(status) || !PAUSING_STATUSES.has(status)) {
+      return false;
+    }
+    this.pausedUntil = Math.max(this.pausedUntil, this.now() + this.cooldownMs);
+    return true;
+  }
+}
diff --git a/src/engine/llm.ts b/src/engine/llm.ts
index de7fd48..786d0a6 100755
--- a/src/engine/llm.ts
+++ b/src/engine/llm.ts
@@ -14,6 +14,8 @@
  * 内置：429/5xx 重试 3 次 + 30s 超时
  */
 
+import { LlmFailureGuard } from "./llm-guard.ts";
+
 export interface LlmConfig {
   apiKey?: string;
   baseURL?: string;
@@ -52,51 +54,70 @@ export function createCompleteFn(
   llmConfig?: LlmConfig,
   anthropicApiKey?: string,
 ): CompleteFn {
+  const guard = new LlmFailureGuard();
+
   return async (system, user) => {
-    // ── 路径 A（优先）：pluginConfig.llm 直接调 OpenAI 兼容 API ──
-    if (llmConfig?.apiKey && llmConfig?.baseURL) {
-      const baseURL = llmConfig.baseURL.replace(/\/+$/, "");
-      const llmModel = llmConfig.model ?? model;
-      const res = await fetchRetry(`${baseURL}/chat/completions`, {
+    if (!guard.canRun()) {
+      const seconds = Math.max(1, Math.ceil(guard.remainingMs() / 1000));
+      throw new Error(
+        `[graph-memory] LLM paused for ${seconds}s after a previous permanent API error`,
+      );
+    }
+
+    try {
+      // ── 路径 A（优先）：pluginConfig.llm 直接调 OpenAI 兼容 API ──
+      if (llmConfig?.apiKey && llmConfig?.baseURL) {
+        const baseURL = llmConfig.baseURL.replace(/\/+$/, "");
+        const llmModel = llmConfig.model ?? model;
+        const res = await fetchRetry(`${baseURL}/chat/completions`, {
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+            "Authorization": `Bearer ${llmConfig.apiKey}`,
+          },
+          body: JSON.stringify({
+            model: llmModel,
+            messages: [
+              ...(system.trim() ? [{ role: "system", content: system.trim() }] : []),
+              { role: "user", content: user },
+            ],
+            temperature: 0.1,
+          }),
+        });
+        if (!res.ok) {
+          const errText = await res.text().catch(() => "");
+          throw new Error(`[graph-memory] LLM API ${res.status}: ${errText.slice(0, 200)}`);
+        }
+        const data = await res.json() as any;
+        const text = data.choices?.[0]?.message?.content ?? "";
+        if (!text) throw new Error("[graph-memory] LLM returned empty content");
+        guard.reset();
+        return text;
+      }
+
+      // ── 路径 B：Anthropic API ──────────────────────────────
+      if (!anthropicApiKey) {
+        throw new Error(
+          "[graph-memory] No LLM available. 在 openclaw.json 的 graph-memory config 中配置 llm.apiKey + llm.baseURL",
+        );
+      }
+      const res = await fetchRetry("https://api.anthropic.com/v1/messages", {
         method: "POST",
-        headers: {
-          "Content-Type": "application/json",
-          "Authorization": `Bearer ${llmConfig.apiKey}`,
-        },
-        body: JSON.stringify({
-          model: llmModel,
-          messages: [
-            ...(system.trim() ? [{ role: "system", content: system.trim() }] : []),
-            { role: "user", content: user },
-          ],
-          temperature: 0.1,
-        }),
+        headers: { "Content-Type": "application/json", "x-api-key": anthropicApiKey, "anthropic-version": "2023-06-01" },
+        body: JSON.stringify({ model: llmConfig?.model ?? model, max_tokens: 4096, system, messages: [{ role: "user", content: user }] }),
       });
-      if (!res.ok) {
-        const errText = await res.text().catch(() => "");
-        throw new Error(`[graph-memory] LLM API ${res.status}: ${errText.slice(0, 200)}`);
-      }
+      if (!res.ok) throw new Error(`[graph-memory] Anthropic API ${res.status}`);
       const data = await res.json() as any;
-      const text = data.choices?.[0]?.message?.content ?? "";
-      if (text) return text;
-      throw new Error("[graph-memory] LLM returned empty content");
-    }
-
-    // ── 路径 B：Anthropic API ──────────────────────────────
-    if (!anthropicApiKey) {
-      throw new Error(
-        "[graph-memory] No LLM available. 在 openclaw.json 的 graph-memory config 中配置 llm.apiKey + llm.baseURL",
-      );
+      const text = data.content?.[0]?.text ?? "";
+      if (!text) throw new Error("[graph-memory] Anthropic API returned empty content");
+      guard.reset();
+      return text;
+    } catch (error) {
+      if (guard.tripIfNeeded(error)) {
+        const seconds = Math.max(1, Math.ceil(guard.remainingMs() / 1000));
+        throw new Error(`${String(error)}; pausing graph-memory LLM calls for ${seconds}s`);
+      }
+      throw error;
     }
-    const res = await fetchRetry("https://api.anthropic.com/v1/messages", {
-      method: "POST",
-      headers: { "Content-Type": "application/json", "x-api-key": anthropicApiKey, "anthropic-version": "2023-06-01" },
-      body: JSON.stringify({ model: llmConfig?.model ?? model, max_tokens: 4096, system, messages: [{ role: "user", content: user }] }),
-    });
-    if (!res.ok) throw new Error(`[graph-memory] Anthropic API ${res.status}`);
-    const data = await res.json() as any;
-    const text = data.content?.[0]?.text ?? "";
-    if (text) return text;
-    throw new Error("[graph-memory] Anthropic API returned empty content");
   };
-}
\ No newline at end of file
+}
diff --git a/test/llm-guard.test.ts b/test/llm-guard.test.ts
new file mode 100644
index 0000000..ac1e86d
--- /dev/null
+++ b/test/llm-guard.test.ts
@@ -0,0 +1,29 @@
+import { describe, expect, it } from "vitest";
+
+import { LlmFailureGuard } from "../src/engine/llm-guard.ts";
+
+describe("LlmFailureGuard", () => {
+  it("pauses after permanent 4xx API errors", () => {
+    let now = 1_000;
+    const guard = new LlmFailureGuard(60_000, () => now);
+
+    expect(guard.canRun()).toBe(true);
+    expect(
+      guard.tripIfNeeded(new Error('[graph-memory] LLM API 403: {"error":"User not found or inactive"}')),
+    ).toBe(true);
+    expect(guard.canRun()).toBe(false);
+
+    now += 59_000;
+    expect(guard.canRun()).toBe(false);
+
+    now += 2_000;
+    expect(guard.canRun()).toBe(true);
+  });
+
+  it("ignores retryable errors", () => {
+    const guard = new LlmFailureGuard(60_000, () => 1_000);
+
+    expect(guard.tripIfNeeded(new Error("[graph-memory] LLM API 429: rate limited"))).toBe(false);
+    expect(guard.canRun()).toBe(true);
+  });
+});