diff --git a/agent/component/iterationitem.py b/agent/component/iterationitem.py
index 71d032b51..cb1a27049 100644
--- a/agent/component/iterationitem.py
+++ b/agent/component/iterationitem.py
@@ -38,6 +38,10 @@ class IterationItem(ComponentBase, ABC):
         ans = parent.get_input()
         ans = parent._param.delimiter.join(ans["content"]) if "content" in ans else ""
         ans = [a.strip() for a in ans.split(parent._param.delimiter)]
+        if not ans:
+            self._idx = -1
+            return pd.DataFrame()
+
         df = pd.DataFrame([{"content": ans[self._idx]}])
         self._idx += 1
         if self._idx >= len(ans):
diff --git a/agentic_reasoning/prompts.py b/agentic_reasoning/prompts.py
index 610409af1..715896b86 100644
--- a/agentic_reasoning/prompts.py
+++ b/agentic_reasoning/prompts.py
@@ -68,6 +68,7 @@ REASON_PROMPT = (
         f"- You have a dataset to search, so you just provide a proper search query.\n"
         f"- Use {BEGIN_SEARCH_QUERY} to request a dataset search and end with {END_SEARCH_QUERY}.\n"
         "- The language of query MUST be as the same as 'Question' or 'search result'.\n"
+        "- If no helpful information can be found, rewrite the search query to be less and precise keywords.\n"
         "- When done searching, continue your reasoning.\n\n"
         'Please answer the following question. You should think step by step to solve it.\n\n'
     )
diff --git a/api/db/services/dialog_service.py b/api/db/services/dialog_service.py
index f6e2d8f2b..e7d71186c 100644
--- a/api/db/services/dialog_service.py
+++ b/api/db/services/dialog_service.py
@@ -30,7 +30,8 @@ from api import settings
 from rag.app.resume import forbidden_select_fields4resume
 from rag.app.tag import label_question
 from rag.nlp.search import index_name
-from rag.prompts import kb_prompt, message_fit_in, llm_id2llm_type, keyword_extraction, full_question, chunks_format
+from rag.prompts import kb_prompt, message_fit_in, llm_id2llm_type, keyword_extraction, full_question, chunks_format, \
+    citation_prompt
 from rag.utils import rmSpace, num_tokens_from_string
 from rag.utils.tavily_conn import Tavily
 
@@ -235,9 +236,12 @@ def chat(dialog, messages, stream=True, **kwargs):
     gen_conf = dialog.llm_setting
 
     msg = [{"role": "system", "content": prompt_config["system"].format(**kwargs)}]
+    prompt4citation = ""
+    if knowledges and (prompt_config.get("quote", True) and kwargs.get("quote", True)):
+        prompt4citation = citation_prompt()
     msg.extend([{"role": m["role"], "content": re.sub(r"##\d+\$\$", "", m["content"])}
                 for m in messages if m["role"] != "system"])
-    used_token_count, msg = message_fit_in(msg, int(max_tokens * 0.97))
+    used_token_count, msg = message_fit_in(msg, int(max_tokens * 0.95))
     assert len(msg) >= 2, f"message_fit_in has bug: {msg}"
     prompt = msg[0]["content"]
 
@@ -256,14 +260,23 @@ def chat(dialog, messages, stream=True, **kwargs):
             think = ans[0] + "</think>"
             answer = ans[1]
         if knowledges and (prompt_config.get("quote", True) and kwargs.get("quote", True)):
-            answer, idx = retriever.insert_citations(answer,
-                                                     [ck["content_ltks"]
-                                                      for ck in kbinfos["chunks"]],
-                                                     [ck["vector"]
-                                                      for ck in kbinfos["chunks"]],
-                                                     embd_mdl,
-                                                     tkweight=1 - dialog.vector_similarity_weight,
-                                                     vtweight=dialog.vector_similarity_weight)
+            answer = re.sub(r"##[ij]\$\$", "", answer, flags=re.DOTALL)
+            if not re.search(r"##[0-9]+\$\$", answer):
+                answer, idx = retriever.insert_citations(answer,
+                                                         [ck["content_ltks"]
+                                                          for ck in kbinfos["chunks"]],
+                                                         [ck["vector"]
+                                                          for ck in kbinfos["chunks"]],
+                                                         embd_mdl,
+                                                         tkweight=1 - dialog.vector_similarity_weight,
+                                                         vtweight=dialog.vector_similarity_weight)
+            else:
+                idx = set([])
+                for r in re.finditer(r"##([0-9]+)\$\$", answer):
+                    i = int(r.group(1))
+                    if i < len(kbinfos["chunks"]):
+                        idx.add(i)
+
             idx = set([kbinfos["chunks"][int(i)]["doc_id"] for i in idx])
             recall_docs = [
                 d for d in kbinfos["doc_aggs"] if d["doc_id"] in idx]
@@ -298,7 +311,7 @@ def chat(dialog, messages, stream=True, **kwargs):
     if stream:
         last_ans = ""
         answer = ""
-        for ans in chat_mdl.chat_streamly(prompt, msg[1:], gen_conf):
+        for ans in chat_mdl.chat_streamly(prompt+prompt4citation, msg[1:], gen_conf):
             if thought:
                 ans = re.sub(r"<think>.*</think>", "", ans, flags=re.DOTALL)
             answer = ans
@@ -312,7 +325,7 @@ def chat(dialog, messages, stream=True, **kwargs):
             yield {"answer": thought+answer, "reference": {}, "audio_binary": tts(tts_mdl, delta_ans)}
         yield decorate_answer(thought+answer)
     else:
-        answer = chat_mdl.chat(prompt, msg[1:], gen_conf)
+        answer = chat_mdl.chat(prompt+prompt4citation, msg[1:], gen_conf)
         user_content = msg[-1].get("content", "[content not available]")
         logging.debug("User: {}|Assistant: {}".format(user_content, answer))
         res = decorate_answer(answer)
diff --git a/rag/prompts.py b/rag/prompts.py
index 32ef5325e..af6df1623 100644
--- a/rag/prompts.py
+++ b/rag/prompts.py
@@ -108,22 +108,63 @@ def kb_prompt(kbinfos, max_tokens):
     docs = {d.id: d.meta_fields for d in docs}
 
     doc2chunks = defaultdict(lambda: {"chunks": [], "meta": []})
-    for ck in kbinfos["chunks"][:chunks_num]:
-        doc2chunks[ck["docnm_kwd"]]["chunks"].append((f"URL: {ck['url']}\n" if "url" in ck else "") + ck["content_with_weight"])
+    for i, ck in enumerate(kbinfos["chunks"][:chunks_num]):
+        doc2chunks[ck["docnm_kwd"]]["chunks"].append((f"URL: {ck['url']}\n" if "url" in ck else "") + f"ID: {i}\n" + ck["content_with_weight"])
         doc2chunks[ck["docnm_kwd"]]["meta"] = docs.get(ck["doc_id"], {})
 
     knowledges = []
     for nm, cks_meta in doc2chunks.items():
-        txt = f"Document: {nm} \n"
+        txt = f"\nDocument: {nm} \n"
         for k, v in cks_meta["meta"].items():
             txt += f"{k}: {v}\n"
         txt += "Relevant fragments as following:\n"
         for i, chunk in enumerate(cks_meta["chunks"], 1):
-            txt += f"{i}. {chunk}\n"
+            txt += f"{chunk}\n"
         knowledges.append(txt)
     return knowledges
 
 
+def citation_prompt():
+    return """
+
+# Citation requirements:
+- Inserts CITATIONS in format '##i$$ ##j$$' where i,j are the ID of the content you are citing and encapsulated with '##' and '$$'.
+- Inserts the CITATION symbols at the end of a sentence, AND NO MORE than 4 citations.
+- DO NOT insert CITATION in the answer if the content is not from retrieved chunks.
+
+--- Example START ---
+<SYSTEM>: Here is the knowledge base:
+
+Document: Elon Musk Breaks Silence on Crypto, Warns Against Dogecoin ...
+URL: https://blockworks.co/news/elon-musk-crypto-dogecoin
+ID: 0
+The Tesla co-founder advised against going all-in on dogecoin, but Elon Musk said it’s still his favorite crypto...
+
+Document: Elon Musk's Dogecoin tweet sparks social media frenzy
+ID: 1
+Musk said he is 'willing to serve' D.O.G.E. – shorthand for Dogecoin.
+
+Document: Causal effect of Elon Musk tweets on Dogecoin price
+ID: 2
+If you think of Dogecoin — the cryptocurrency based on a meme — you can’t help but also think of Elon Musk...
+
+Document: Elon Musk's Tweet Ignites Dogecoin's Future In Public Services
+ID: 3
+The market is heating up after Elon Musk's announcement about Dogecoin. Is this a new era for crypto?...
+
+      The above is the knowledge base.
+
+<USER>: What's the Elon's view on dogecoin?
+
+<ASSISTANT>: Musk has consistently expressed his fondness for Dogecoin, often citing its humor and the inclusion of dogs in its branding. He has referred to it as his favorite cryptocurrency ##0$$ ##1$$.
+Recently, Musk has hinted at potential future roles for Dogecoin. His tweets have sparked speculation about Dogecoin's potential integration into public services ##3$$.
+Overall, while Musk enjoys Dogecoin and often promotes it, he also warns against over-investing in it, reflecting both his personal amusement and caution regarding its speculative nature.
+
+--- Example END ---
+
+"""
+
+
 def keyword_extraction(chat_mdl, content, topn=3):
     prompt = f"""
 Role: You're a text analyzer. 
diff --git a/rag/utils/tavily_conn.py b/rag/utils/tavily_conn.py
index 7d78636e0..c8eaf4ae9 100644
--- a/rag/utils/tavily_conn.py
+++ b/rag/utils/tavily_conn.py
@@ -27,7 +27,8 @@ class Tavily:
         try:
             response = self.tavily_client.search(
                 query=query,
-                search_depth="advanced"
+                search_depth="advanced",
+                max_results=6
             )
             return [{"url": res["url"], "title": res["title"], "content": res["content"], "score": res["score"]} for res in response["results"]]
         except Exception as e: