mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-08-02 04:30:38 +08:00
Feat: add token comsumption & speed to little lamp. (#6077)
### What problem does this PR solve? #6059 ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
parent
c85b468b8d
commit
42eb99554f
@ -521,7 +521,7 @@ class ComponentBase(ABC):
|
|||||||
if u.lower().find("answer") >= 0:
|
if u.lower().find("answer") >= 0:
|
||||||
for r, c in self._canvas.history[::-1]:
|
for r, c in self._canvas.history[::-1]:
|
||||||
if r == "user":
|
if r == "user":
|
||||||
upstream_outs.append(pd.DataFrame([{"content": c, "component_id": u}]))
|
upstream_outs.append(pd.DataFrame([{"content": f"USER:{c}", "component_id": u}]))
|
||||||
break
|
break
|
||||||
break
|
break
|
||||||
if self.component_name.lower().find("answer") >= 0 and self.get_component_name(u) in ["relevant"]:
|
if self.component_name.lower().find("answer") >= 0 and self.get_component_name(u) in ["relevant"]:
|
||||||
|
@ -304,8 +304,25 @@ def chat(dialog, messages, stream=True, **kwargs):
|
|||||||
retrieval_time_cost = (retrieval_ts - generate_keyword_ts) * 1000
|
retrieval_time_cost = (retrieval_ts - generate_keyword_ts) * 1000
|
||||||
generate_result_time_cost = (finish_chat_ts - retrieval_ts) * 1000
|
generate_result_time_cost = (finish_chat_ts - retrieval_ts) * 1000
|
||||||
|
|
||||||
|
tk_num = num_tokens_from_string(think+answer)
|
||||||
prompt += "\n\n### Query:\n%s" % " ".join(questions)
|
prompt += "\n\n### Query:\n%s" % " ".join(questions)
|
||||||
prompt = f"{prompt}\n\n - Total: {total_time_cost:.1f}ms\n - Check LLM: {check_llm_time_cost:.1f}ms\n - Create retriever: {create_retriever_time_cost:.1f}ms\n - Bind embedding: {bind_embedding_time_cost:.1f}ms\n - Bind LLM: {bind_llm_time_cost:.1f}ms\n - Tune question: {refine_question_time_cost:.1f}ms\n - Bind reranker: {bind_reranker_time_cost:.1f}ms\n - Generate keyword: {generate_keyword_time_cost:.1f}ms\n - Retrieval: {retrieval_time_cost:.1f}ms\n - Generate answer: {generate_result_time_cost:.1f}ms"
|
prompt = (
|
||||||
|
f"{prompt}\n\n"
|
||||||
|
"## Time elapsed:\n"
|
||||||
|
f" - Total: {total_time_cost:.1f}ms\n"
|
||||||
|
f" - Check LLM: {check_llm_time_cost:.1f}ms\n"
|
||||||
|
f" - Create retriever: {create_retriever_time_cost:.1f}ms\n"
|
||||||
|
f" - Bind embedding: {bind_embedding_time_cost:.1f}ms\n"
|
||||||
|
f" - Bind LLM: {bind_llm_time_cost:.1f}ms\n"
|
||||||
|
f" - Tune question: {refine_question_time_cost:.1f}ms\n"
|
||||||
|
f" - Bind reranker: {bind_reranker_time_cost:.1f}ms\n"
|
||||||
|
f" - Generate keyword: {generate_keyword_time_cost:.1f}ms\n"
|
||||||
|
f" - Retrieval: {retrieval_time_cost:.1f}ms\n"
|
||||||
|
f" - Generate answer: {generate_result_time_cost:.1f}ms\n\n"
|
||||||
|
"## Token usage:\n"
|
||||||
|
f" - Generated tokens(approximately): {tk_num}\n"
|
||||||
|
f" - Token speed: {int(tk_num/(generate_result_time_cost/1000.))}/s"
|
||||||
|
)
|
||||||
return {"answer": think+answer, "reference": refs, "prompt": re.sub(r"\n", " \n", prompt), "created_at": time.time()}
|
return {"answer": think+answer, "reference": refs, "prompt": re.sub(r"\n", " \n", prompt), "created_at": time.time()}
|
||||||
|
|
||||||
if stream:
|
if stream:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user