From fe1805fa0ef6a00ff1a0f2f121ef65f1b831de2e Mon Sep 17 00:00:00 2001 From: KevinHuSh Date: Wed, 19 Jun 2024 13:05:32 +0800 Subject: [PATCH] add README to graph (#1211) ### What problem does this PR solve? ### Type of change - [x] Documentation Update --- graph/README.md | 45 ++++++++++++++++++ graph/README_zh.md | 46 +++++++++++++++++++ graph/canvas.py | 6 +-- graph/component/base.py | 4 +- graph/component/categorize.py | 4 +- graph/settings.py | 3 +- graph/test/client.py | 11 +++-- graph/test/dsl_examples/customer_service.json | 2 +- 8 files changed, 107 insertions(+), 14 deletions(-) create mode 100644 graph/README.md create mode 100644 graph/README_zh.md diff --git a/graph/README.md b/graph/README.md new file mode 100644 index 000000000..71e654f03 --- /dev/null +++ b/graph/README.md @@ -0,0 +1,45 @@ +English | [简体中文](./README_zh.md) + +# *Graph* + + +## Introduction + +*Graph* is a mathematical concept which is composed of nodes and edges. +It is used to compose a complex work flow or agent. +And this graph is beyond the DAG that we can use circles to describe our agent or work flow. +Under this folder, we propose a test tool ./test/client.py which can test the DSLs such as json files in folder ./test/dsl_examples. +Please use this client at the same folder you start RAGFlow. If it's ran by docker, please go into the container before running the client. +Otherwise, correct configurations in conf/service_conf.yaml is essential. + +```bash +PYTHONPATH=path/to/ragflow python graph/test/client.py -h +usage: client.py [-h] -s DSL -t TENANT_ID -m + +options: + -h, --help show this help message and exit + -s DSL, --dsl DSL input dsl + -t TENANT_ID, --tenant_id TENANT_ID + Tenant ID + -m, --stream Stream output +``` +
+ +
+ + +## How to gain a TENANT_ID in command line? +
+ +
+💡 We plant to display it here in the near future. +
+ +
+ + +## How to set 'kb_ids' for component 'Retrieval' in DSL? +
+ +
+ diff --git a/graph/README_zh.md b/graph/README_zh.md new file mode 100644 index 000000000..003ff4b7a --- /dev/null +++ b/graph/README_zh.md @@ -0,0 +1,46 @@ +[English](./README.md) | 简体中文 + +# *Graph* + + +## 简介 + +"Graph"是一个由节点和边组成的数学概念。 +它被用来构建复杂的工作流或代理。 +这个图超越了有向无环图(DAG),我们可以使用循环来描述我们的代理或工作流。 +在这个文件夹下,我们提出了一个测试工具 ./test/client.py, +它可以测试像文件夹./test/dsl_examples下一样的DSL文件。 +请在启动 RAGFlow 的同一文件夹中使用此客户端。如果它是通过 Docker 运行的,请在运行客户端之前进入容器。 +否则,正确配置 conf/service_conf.yaml 文件是必不可少的。 + +```bash +PYTHONPATH=path/to/ragflow python graph/test/client.py -h +usage: client.py [-h] -s DSL -t TENANT_ID -m + +options: + -h, --help show this help message and exit + -s DSL, --dsl DSL input dsl + -t TENANT_ID, --tenant_id TENANT_ID + Tenant ID + -m, --stream Stream output +``` +
+ +
+ + +## 命令行中的TENANT_ID如何获得? +
+ +
+💡 后面会展示在这里: +
+ +
+ + +## DSL里面的Retrieval组件的kb_ids怎么填? +
+ +
+ diff --git a/graph/canvas.py b/graph/canvas.py index 08b842b76..c5b122fbc 100644 --- a/graph/canvas.py +++ b/graph/canvas.py @@ -24,7 +24,7 @@ import pandas as pd from graph.component import component_class from graph.component.base import ComponentBase -from graph.settings import flow_logger +from graph.settings import flow_logger, DEBUG class Canvas(ABC): @@ -170,14 +170,14 @@ class Canvas(ABC): if cpn.component_name == "Answer": self.answer.append(c) else: - print("RUN: ", c) + if DEBUG: print("RUN: ", c) ans = cpn.run(self.history, **kwargs) self.path[-1].append(c) ran += 1 prepare2run(self.components[self.path[-2][-1]]["downstream"]) while ran < len(self.path[-1]): - print(ran, self.path) + if DEBUG: print(ran, self.path) cpn_id = self.path[-1][ran] cpn = self.get_component(cpn_id) if not cpn["downstream"]: break diff --git a/graph/component/base.py b/graph/component/base.py index 904072d25..76d3e0766 100644 --- a/graph/component/base.py +++ b/graph/component/base.py @@ -24,7 +24,7 @@ from typing import List, Dict import pandas as pd from graph import settings -from graph.settings import flow_logger +from graph.settings import flow_logger, DEBUG _FEEDED_DEPRECATED_PARAMS = "_feeded_deprecated_params" _DEPRECATED_PARAMS = "_deprecated_params" @@ -428,7 +428,7 @@ class ComponentBase(ABC): reversed_cpnts.extend(self._canvas.path[-2]) reversed_cpnts.extend(self._canvas.path[-1]) - print(self.component_name, reversed_cpnts[::-1]) + if DEBUG: print(self.component_name, reversed_cpnts[::-1]) for u in reversed_cpnts[::-1]: if self.get_component_name(u) in ["switch"]: continue if self.component_name.lower().find("switch") < 0 \ diff --git a/graph/component/categorize.py b/graph/component/categorize.py index 2ab5bc5a5..8a7f90e70 100644 --- a/graph/component/categorize.py +++ b/graph/component/categorize.py @@ -20,6 +20,7 @@ import pandas as pd from api.db import LLMType from api.db.services.llm_service import LLMBundle from graph.component import GenerateParam, Generate +from graph.settings import DEBUG class CategorizeParam(GenerateParam): @@ -72,12 +73,11 @@ class Categorize(Generate, ABC): def _run(self, history, **kwargs): input = self.get_input() - print(input, "DDDDDDDDDDDDDDDDDDDDDDDDDDDDD") input = "Question: " + ("; ".join(input["content"]) if "content" in input else "") + "Category: " chat_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.CHAT, self._param.llm_id) ans = chat_mdl.chat(self._param.get_prompt(), [{"role": "user", "content": input}], self._param.gen_conf()) - print(ans, ":::::::::::::::::::::::::::::::::") + if DEBUG: print(ans, ":::::::::::::::::::::::::::::::::", input) for c in self._param.category_description.keys(): if ans.lower().find(c.lower()) >= 0: return Categorize.be_output(self._param.category_description[c]["to"]) diff --git a/graph/settings.py b/graph/settings.py index e0cd1144e..f6fec1075 100644 --- a/graph/settings.py +++ b/graph/settings.py @@ -19,6 +19,7 @@ import os from api.utils.file_utils import get_project_base_directory from api.utils.log_utils import LoggerFactory, getLogger +DEBUG = 0 LoggerFactory.set_directory( os.path.join( get_project_base_directory(), @@ -30,4 +31,4 @@ LoggerFactory.LEVEL = 30 flow_logger = getLogger("flow") database_logger = getLogger("database") FLOAT_ZERO = 1e-8 -PARAM_MAXDEPTH = 5 \ No newline at end of file +PARAM_MAXDEPTH = 5 diff --git a/graph/test/client.py b/graph/test/client.py index 1f93382da..3682a5172 100644 --- a/graph/test/client.py +++ b/graph/test/client.py @@ -18,6 +18,7 @@ import os from functools import partial import readline from graph.canvas import Canvas +from graph.settings import DEBUG if __name__ == '__main__': parser = argparse.ArgumentParser() @@ -28,21 +29,21 @@ if __name__ == '__main__': ) parser.add_argument('-s', '--dsl', default=dsl_default_path, help="input dsl", action='store', required=True) parser.add_argument('-t', '--tenant_id', default=False, help="Tenant ID", action='store', required=True) - parser.add_argument('-m', '--stream', default=False, help="Stream output", action='store_true', required=True) + parser.add_argument('-m', '--stream', default=False, help="Stream output", action='store_true', required=False) args = parser.parse_args() canvas = Canvas(open(args.dsl, "r").read(), args.tenant_id) while True: ans = canvas.run(stream=args.stream) - print("==================== Bot =====================\n> ") + print("==================== Bot =====================\n> ", end='') if args.stream and isinstance(ans, partial): cont = "" for an in ans(): - print(an["content"][len(cont):], end='') + print(an["content"][len(cont):], end='', flush=True) cont = an["content"] else: print(ans["content"]) - print(canvas.path) - question = input("==================== User =====================\n> ") + if DEBUG: print(canvas.path) + question = input("\n==================== User =====================\n> ") canvas.add_user_input(question) diff --git a/graph/test/dsl_examples/customer_service.json b/graph/test/dsl_examples/customer_service.json index bda69c636..11b8460b8 100644 --- a/graph/test/dsl_examples/customer_service.json +++ b/graph/test/dsl_examples/customer_service.json @@ -4,7 +4,7 @@ "obj":{ "component_name": "Begin", "params": { - "prologue": "Hi there!" + "prologue": "Hi! How can I help you?" } }, "downstream": ["answer:0"],