diff --git a/README.md b/README.md
index 936d950..e23764b 100644
--- a/README.md
+++ b/README.md
@@ -31,6 +31,7 @@ A basic agent could be implemented as simple as a list of prompts for the subtas
 - [Using Built-in LLM_API](#Built-in-LLM-API)
 - [Using AgentKit without Programming Experience](#Using-AgentKit-without-Programming-Experience)
 - [Node Components](#Node-Components)
+- [Commonly Asked Questions](#Commonly-Asked-Questions)
 - [Citing AgnetKit](#Citing-AgentKit)
 
 # Installation
@@ -140,6 +141,12 @@ Inside each node (as shown to the left of the figure), AgentKit runs a built-in
 
 To support advanced capabilities such as branching, AgentKit offers API to dynamically modify the DAG at inference time (as shown to the right of the figure). Nodes/edges could be dynamically added or removed based on the LLM response at some ancestor nodes.
 
+# Commonly Asked Questions
+
+**Q:** I'm using the default `agentkit.llm_api`, and `graph.evaluate()` seems to be stuck.
+
+**A:** The LLM_API function catches and retries all API errors by default. Set `verbose=True` for each node to see which node you are stuck on, and `LLM_API_FUNCTION.debug=True` to see what error is causing the error.
+
 # Citing AgentKit
 ```bibtex
 @article{wu2024agentkit,
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 32ee772..4d6c058 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -11,7 +11,7 @@
 author = 'Yue Wu'
 
 release = '0.1'
-version = '0.1.5'
+version = '0.1.6'
 
 # -- General configuration
 
diff --git a/setup.py b/setup.py
index bca788d..6453220 100644
--- a/setup.py
+++ b/setup.py
@@ -3,7 +3,7 @@
 import pathlib
 
 PKG_NAME = "agentkit-llm"
-VERSION = "0.1.5"
+VERSION = "0.1.6"
 EXTRAS = {
     "logging": ["wandb"],
     "all": ["wandb", "openai", "anthropic", "tiktoken"],
diff --git a/src/agentkit/llm_api/GPT.py b/src/agentkit/llm_api/GPT.py
index 538f796..e8881e7 100644
--- a/src/agentkit/llm_api/GPT.py
+++ b/src/agentkit/llm_api/GPT.py
@@ -96,12 +96,15 @@ def query_chat(self, messages, shrink_idx, max_gen=512, temp=0.):
                         max_tokens=max_gen,
                     )
                 return completion.choices[0].message.content, {"prompt":completion.usage.prompt_tokens, "completion":completion.usage.completion_tokens, "total":completion.usage.total_tokens}
-            except (openai.RateLimitError, openai.APIStatusError, openai.APITimeoutError, openai.APIConnectionError, openai.InternalServerError):
-                time.sleep(30)
             except Exception as e:
-                e = str(e)
-                if "However, your messages resulted in" in e:
-                    print("error:", e)
+                if self.debug:
+                    raise e
+                elif isinstance(e, openai.RateLimitError) or isinstance(e, openai.APIStatusError) or isinstance(e, openai.APITimeoutError) or isinstance(e, openai.APIConnectionError) or isinstance(e, openai.InternalServerError):
+                    time.sleep(30)
+                    print(e)
+                elif "However, your messages resulted in" in str(e):
+                    print("error:", e, str(e))
+                    e = str(e)
                     index = e.find("your messages resulted in ")
                     import re
                     val = int(re.findall(r'\d+', e[index + len("your messages resulted in ") : ])[0])
@@ -109,5 +112,6 @@ def query_chat(self, messages, shrink_idx, max_gen=512, temp=0.):
                     model_max = int(re.findall(r'\d+', e[index2 + len("maximum context length is "):])[0])
                     messages = self.shrink_msg_by(messages, shrink_idx, val-model_max)
                 else:
+                    print("error:", e)
+                    print("retrying in 5 seconds")
                     time.sleep(5)
-                    print(e)
diff --git a/src/agentkit/llm_api/base.py b/src/agentkit/llm_api/base.py
index 4272169..f477456 100644
--- a/src/agentkit/llm_api/base.py
+++ b/src/agentkit/llm_api/base.py
@@ -10,6 +10,7 @@ def __init__(self, model_name, global_conter=None, model_type = "chat"):
         assert model_type in ["chat", "completion"], "type should be either 'chat' or 'completion'"
         self.type = model_type
         self.global_counter = global_conter
+        self.debug = False
     
     def query_chat(self, messages, shrink_idx, model, max_gen=1024, temp=0.):
         raise NotImplementedError
diff --git a/src/agentkit/llm_api/claude.py b/src/agentkit/llm_api/claude.py
index d307625..7f4cd59 100644
--- a/src/agentkit/llm_api/claude.py
+++ b/src/agentkit/llm_api/claude.py
@@ -89,14 +89,16 @@ def query_chat(self, messages, shrink_idx, max_gen=512, temp=0.):
                     max_tokens=max_gen,
                 )
                 return message.content[0].text, {"prompt":message.usage.input_tokens, "completion":message.usage.output_tokens, "total":message.usage.input_tokens+message.usage.output_tokens}
-            except (anthropic.APIConnectionError, anthropic.APIStatusError, anthropic.InternalServerError) as e:
-                time.sleep(30)
-            except anthropic.RateLimitError as e:
-                time.sleep(5*60)
             except Exception as e:
-                e = str(e)
-                if "However, your messages resulted in" in e:
-                    print("error:", e)
+                if self.debug:
+                    raise e
+                elif isinstance(e, anthropic.APIConnectionError) or isinstance(e, anthropic.APIStatusError) or isinstance(e, anthropic.InternalServerError):
+                    time.sleep(30)
+                elif isinstance(e, anthropic.RateLimitError):
+                    time.sleep(5*60)
+                elif "However, your messages resulted in" in str(e):
+                    print("error:", e, str(e))
+                    e = str(e)
                     index = e.find("your messages resulted in ")
                     import re
                     val = int(re.findall(r'\d+', e[index + len("your messages resulted in ") : ])[0])