diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 000000000..09ee6e14e
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,14 @@
+FROM infiniflow/ragflow-base:v1.0
+
+WORKDIR /ragflow
+
+COPY . ./
+RUN cd ./web && npm i && npm build
+
+ENV PYTHONPATH=/ragflow
+ENV HF_ENDPOINT=https://hf-mirror.com
+
+COPY docker/entrypoint.sh ./
+RUN chmod +x ./entrypoint.sh
+
+ENTRYPOINT ["/bin/bash", "./entrypoint.sh"]
\ No newline at end of file
diff --git a/README.md b/README.md
index 21ab9664c..7f1e884c8 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,47 @@
-# docgpt
\ No newline at end of file
+English | [简体中文](./README_zh.md)
+
+
+## System Environment Preparation
+
+### Install docker
+
+If your machine doesn't have *Docker* installed, please refer to [Install Docker Engine](https://docs.docker.com/engine/install/)
+
+### OS Setups
+Inorder to run [ElasticSearch](https://www.elastic.co/guide/en/elasticsearch/reference/current/install-elasticsearch.html),
+you need to check the following command:
+```bash
+121:/ragflow# sysctl vm.max_map_count
+vm.max_map_count = 262144
+```
+If **vm.max_map_count** is not larger than 65535, please run the following commands:
+```bash
+121:/ragflow# sudo sysctl -w vm.max_map_count=262144
+```
+However, this change is not persistent and will be reset after a system reboot.
+To make the change permanent, you need to update the **/etc/sysctl.conf file**.
+Add or update the following line in the file:
+```bash
+vm.max_map_count=262144
+```
+
+### Here we go!
+> If you want to change the basic setups, like port, password .etc., please refer to [.env](./docker/.env) before starting the system.
+
+> If you change anything in [.env](./docker/.env), please check [service_conf.yaml](./conf/service_conf.yaml) which is a
+> configuration of the back-end service and should be consistent with [.env](./docker/.env).
+
+> - In [service_conf.yaml](./conf/service_conf.yaml), configuration of *LLM* in **user_default_llm** is strongly recommended.
+> In **user_default_llm** of [service_conf.yaml](./conf/service_conf.yaml), you need to specify LLM factory and your own _API_KEY_.
+> It's O.K if you don't have _API_KEY_ at the moment, you can specify it later at the setting part after starting and logging in the system.
+> - We have supported the flowing LLM factory, and the others is coming soon:
+> [OpenAI](https://platform.openai.com/login?launch), [通义千问/QWen](https://dashscope.console.aliyun.com/model),
+> [智普AI/ZhipuAI](https://open.bigmodel.cn/)
+```bash
+121:/ragflow# cd docker
+121:/ragflow/docker# docker compose up
+```
+If after a few minutes, it stops screening and halts like following picture, _**Hallelujah!**_ You have successfully launched the system.
+
+

+
\ No newline at end of file
diff --git a/README_zh.md b/README_zh.md
new file mode 100644
index 000000000..c43cc5665
--- /dev/null
+++ b/README_zh.md
@@ -0,0 +1 @@
+[English](./README.md) | 简体中文
\ No newline at end of file
diff --git a/api/apps/document_app.py b/api/apps/document_app.py
index 9ce7e2b11..4a75922a7 100644
--- a/api/apps/document_app.py
+++ b/api/apps/document_app.py
@@ -146,6 +146,21 @@ def list():
return server_error_response(e)
+@manager.route('/thumbnails', methods=['GET'])
+@login_required
+def thumbnails():
+ doc_ids = request.args.get("doc_ids").split(",")
+ if not doc_ids:
+ return get_json_result(
+ data=False, retmsg='Lack of "Document ID"', retcode=RetCode.ARGUMENT_ERROR)
+
+ try:
+ docs = DocumentService.get_thumbnails(doc_ids)
+ return get_json_result(data={d["id"]: d["thumbnail"] for d in docs})
+ except Exception as e:
+ return server_error_response(e)
+
+
@manager.route('/change_status', methods=['POST'])
@login_required
@validate_request("doc_id", "status")
@@ -263,6 +278,7 @@ def rename():
@manager.route('/get/', methods=['GET'])
+#@login_required
def get(doc_id):
try:
e, doc = DocumentService.get_by_id(doc_id)
diff --git a/api/db/init_data.py b/api/db/init_data.py
index 531abf42b..efa2d8a2a 100644
--- a/api/db/init_data.py
+++ b/api/db/init_data.py
@@ -56,21 +56,21 @@ def init_superuser():
"api_key": API_KEY})
if not UserService.save(**user_info):
- print("【ERROR】can't init admin.")
+ print("\033[93m【ERROR】\033[0mcan't init admin.")
return
TenantService.insert(**tenant)
UserTenantService.insert(**usr_tenant)
TenantLLMService.insert_many(tenant_llm)
- print("【INFO】Super user initialized. user name: admin, password: admin. Changing the password after logining is strongly recomanded.")
+ print("【INFO】Super user initialized. \033[93muser name: admin, password: admin\033[0m. Changing the password after logining is strongly recomanded.")
chat_mdl = LLMBundle(tenant["id"], LLMType.CHAT, tenant["llm_id"])
msg = chat_mdl.chat(system="", history=[{"role": "user", "content": "Hello!"}], gen_conf={})
if msg.find("ERROR: ") == 0:
- print("【ERROR】: '{}' dosen't work. {}".format(tenant["llm_id"]), msg)
+ print("\33[91m【ERROR】\33[0m: ", "'{}' dosen't work. {}".format(tenant["llm_id"]), msg)
embd_mdl = LLMBundle(tenant["id"], LLMType.EMBEDDING, tenant["embd_id"])
- v,c = embd_mdl.encode(["Hello!"])
+ v, c = embd_mdl.encode(["Hello!"])
if c == 0:
- print("【ERROR】: '{}' dosen't work...".format(tenant["embd_id"]))
+ print("\33[91m【ERROR】\33[0m:", " '{}' dosen't work!".format(tenant["embd_id"]))
def init_llm_factory():
@@ -89,12 +89,13 @@ def init_llm_factory():
"logo": "",
"tags": "LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION",
"status": "1",
- },{
- "name": "文心一言",
- "logo": "",
- "tags": "LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION",
- "status": "1",
},
+ # {
+ # "name": "文心一言",
+ # "logo": "",
+ # "tags": "LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION",
+ # "status": "1",
+ # },
]
llm_infos = [
# ---------------------- OpenAI ------------------------
@@ -198,7 +199,7 @@ def init_llm_factory():
"llm_name": "embedding-2",
"tags": "TEXT EMBEDDING",
"max_tokens": 512,
- "model_type": LLMType.SPEECH2TEXT.value
+ "model_type": LLMType.EMBEDDING.value
},
]
for info in factory_infos:
diff --git a/api/db/services/document_service.py b/api/db/services/document_service.py
index d4d00c1f6..f58b0e1b8 100644
--- a/api/db/services/document_service.py
+++ b/api/db/services/document_service.py
@@ -107,4 +107,11 @@ class DocumentService(CommonService):
docs = cls.model.select(Knowledgebase.tenant_id).join(Knowledgebase, on=(Knowledgebase.id == cls.model.kb_id)).where(cls.model.id == doc_id, Knowledgebase.status==StatusEnum.VALID.value)
docs = docs.dicts()
if not docs:return
- return docs[0]["tenant_id"]
\ No newline at end of file
+ return docs[0]["tenant_id"]
+
+
+ @classmethod
+ @DB.connection_context()
+ def get_thumbnails(cls, docids):
+ fields = [cls.model.id, cls.model.thumbnail]
+ return list(cls.model.select(*fields).where(cls.model.id.in_(docids)).dicts())
diff --git a/api/ragflow_server.py b/api/ragflow_server.py
index f322b4e92..44b489695 100644
--- a/api/ragflow_server.py
+++ b/api/ragflow_server.py
@@ -33,6 +33,15 @@ from api.db.init_data import init_web_data
from api.versions import get_versions
if __name__ == '__main__':
+ print("""
+ ____ ______ __
+ / __ \ ____ _ ____ _ / ____// /____ _ __
+ / /_/ // __ `// __ `// /_ / // __ \| | /| / /
+ / _, _// /_/ // /_/ // __/ / // /_/ /| |/ |/ /
+/_/ |_| \__,_/ \__, //_/ /_/ \____/ |__/|__/
+ /____/
+
+ """)
stat_logger.info(
f'project base: {utils.file_utils.get_project_base_directory()}'
)
diff --git a/api/settings.py b/api/settings.py
index 08f7dc74e..331a0866b 100644
--- a/api/settings.py
+++ b/api/settings.py
@@ -45,12 +45,36 @@ REQUEST_MAX_WAIT_SEC = 300
USE_REGISTRY = get_base_config("use_registry")
+default_llm = {
+ "通义千问": {
+ "chat_model": "qwen-plus",
+ "embedding_model": "text-embedding-v2",
+ "image2text_model": "qwen-vl-max",
+ "asr_model": "paraformer-realtime-8k-v1",
+ },
+ "OpenAI": {
+ "chat_model": "gpt-3.5-turbo",
+ "embedding_model": "text-embedding-ada-002",
+ "image2text_model": "gpt-4-vision-preview",
+ "asr_model": "whisper-1",
+ },
+ "智普AI": {
+ "chat_model": "glm-3-turbo",
+ "embedding_model": "embedding-2",
+ "image2text_model": "glm-4v",
+ "asr_model": "",
+ },
+}
LLM = get_base_config("user_default_llm", {})
-LLM_FACTORY=LLM.get("factory", "通义千问")
-CHAT_MDL = LLM.get("chat_model", "qwen-plus")
-EMBEDDING_MDL = LLM.get("embedding_model", "text-embedding-v2")
-ASR_MDL = LLM.get("asr_model", "paraformer-realtime-8k-v1")
-IMAGE2TEXT_MDL = LLM.get("image2text_model", "qwen-vl-max")
+LLM_FACTORY = LLM.get("factory", "通义千问")
+if LLM_FACTORY not in default_llm:
+ print("\33[91m【ERROR】\33[0m:", f"LLM factory {LLM_FACTORY} has not supported yet, switch to '通义千问/QWen' automatically, and please check the API_KEY in service_conf.yaml.")
+ LLM_FACTORY = "通义千问"
+CHAT_MDL = default_llm[LLM_FACTORY]["chat_model"]
+EMBEDDING_MDL = default_llm[LLM_FACTORY]["embedding_model"]
+ASR_MDL = default_llm[LLM_FACTORY]["asr_model"]
+IMAGE2TEXT_MDL = default_llm[LLM_FACTORY]["image2text_model"]
+
API_KEY = LLM.get("api_key", "infiniflow API Key")
PARSERS = LLM.get("parsers", "general:General,qa:Q&A,resume:Resume,naive:Naive,table:Table,laws:Laws,manual:Manual,book:Book,paper:Paper,presentation:Presentation,picture:Picture")
@@ -72,7 +96,7 @@ RANDOM_INSTANCE_ID = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("random_inst
PROXY = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("proxy")
PROXY_PROTOCOL = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("protocol")
-DATABASE = decrypt_database_config()
+DATABASE = decrypt_database_config(name="mysql")
# Logger
LoggerFactory.set_directory(os.path.join(get_project_base_directory(), "logs", "api"))
diff --git a/api/utils/__init__.py b/api/utils/__init__.py
index 49ebc5b06..9ae6e0cb7 100644
--- a/api/utils/__init__.py
+++ b/api/utils/__init__.py
@@ -264,7 +264,7 @@ def decrypt_database_password(password):
return pwdecrypt_fun(private_key, password)
-def decrypt_database_config(database=None, passwd_key="passwd", name="database"):
+def decrypt_database_config(database=None, passwd_key="password", name="database"):
if not database:
database = get_base_config(name, {})
diff --git a/conf/service_conf.yaml b/conf/service_conf.yaml
index 34b357c0c..5bf7b782a 100644
--- a/conf/service_conf.yaml
+++ b/conf/service_conf.yaml
@@ -1,3 +1,28 @@
+ragflow:
+ host: 0.0.0.0
+ http_port: 9380
+mysql:
+ name: 'rag_flow'
+ user: 'root'
+ password: 'infini_rag_flow'
+ host: '127.0.0.1'
+ port: 5455
+ max_connections: 100
+ stale_timeout: 30
+minio:
+ user: 'rag_flow'
+ password: 'infini_rag_flow'
+ host: '127.0.0.1:9000'
+es:
+ hosts: 'http://127.0.0.1:1200'
+user_default_llm:
+ factory: '通义千问'
+ api_key: 'sk-xxxxxxxxxxxxx'
+oauth:
+ github:
+ client_id: 302129228f0d96055bee
+ secret_key: e518e55ccfcdfcae8996afc40f110e9c95f14fc4
+ url: https://github.com/login/oauth/access_token
authentication:
client:
switch: false
@@ -8,34 +33,4 @@ authentication:
permission:
switch: false
component: false
- dataset: false
-ragflow:
- # you must set real ip address, 127.0.0.1 and 0.0.0.0 is not supported
- host: 0.0.0.0
- http_port: 9380
-database:
- name: 'rag_flow'
- user: 'root'
- passwd: 'infini_rag_flow'
- host: '127.0.0.1'
- port: 5455
- max_connections: 100
- stale_timeout: 30
-minio:
- user: 'rag_flow'
- passwd: 'infini_rag_flow'
- host: '127.0.0.1:9000'
-es:
- hosts: 'http://127.0.0.1:9200'
-user_default_llm:
- factory: '通义千问'
- chat_model: 'qwen-plus'
- embedding_model: 'text-embedding-v2'
- asr_model: 'paraformer-realtime-8k-v1'
- image2text_model: 'qwen-vl-max'
- api_key: 'sk-xxxxxxxxxxxxx'
-oauth:
- github:
- client_id: 302129228f0d96055bee
- secret_key: e518e55ccfcdfcae8996afc40f110e9c95f14fc4
- url: https://github.com/login/oauth/access_token
\ No newline at end of file
+ dataset: false
\ No newline at end of file
diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
index b01d215aa..1cd7f0313 100644
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@@ -84,6 +84,24 @@ services:
restart: always
+ ragflow:
+ depends_on:
+ - es01
+ - mysql
+ - minio
+ image: infiniflow/ragflow:v1.0
+ container_name: ragflow-server
+ ports:
+ - ${SVR_HTTP_PORT}:9380
+ volumes:
+ - ./service_conf.yaml:/ragflow/conf/service_conf.yaml
+ - ./nginx.conf:/etc/nginx/nginx.conf
+ - ./ragflow-logs:/ragflow/logs
+ networks:
+ - ragflow
+ restart: always
+
+
volumes:
esdata01:
driver: local
diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh
new file mode 100644
index 000000000..99661f14e
--- /dev/null
+++ b/docker/entrypoint.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+python rag/svr/task_broker.py &
+
+function task_exe(){
+ while [ 1 -eq 1 ];do mpirun -n 2 python rag/svr/task_executor.py ; done
+}
+
+function watch_broker(){
+ while [ 1 -eq 1];do
+ C=`ps aux|grep "task_broker.py"|grep -v grep|wc -l`;
+ if [ $C -lt 1 ];then
+ python rag/svr/task_broker.py &
+ fi
+ sleep 5;
+ done
+}
+
+
+task_exe &
+sleep 10;
+watch_broker &
+
+python api/ragflow_server.py
\ No newline at end of file
diff --git a/docker/service_conf.yaml b/docker/service_conf.yaml
new file mode 100644
index 000000000..5bf7b782a
--- /dev/null
+++ b/docker/service_conf.yaml
@@ -0,0 +1,36 @@
+ragflow:
+ host: 0.0.0.0
+ http_port: 9380
+mysql:
+ name: 'rag_flow'
+ user: 'root'
+ password: 'infini_rag_flow'
+ host: '127.0.0.1'
+ port: 5455
+ max_connections: 100
+ stale_timeout: 30
+minio:
+ user: 'rag_flow'
+ password: 'infini_rag_flow'
+ host: '127.0.0.1:9000'
+es:
+ hosts: 'http://127.0.0.1:1200'
+user_default_llm:
+ factory: '通义千问'
+ api_key: 'sk-xxxxxxxxxxxxx'
+oauth:
+ github:
+ client_id: 302129228f0d96055bee
+ secret_key: e518e55ccfcdfcae8996afc40f110e9c95f14fc4
+ url: https://github.com/login/oauth/access_token
+authentication:
+ client:
+ switch: false
+ http_app_key:
+ http_secret_key:
+ site:
+ switch: false
+permission:
+ switch: false
+ component: false
+ dataset: false
\ No newline at end of file
diff --git a/rag/utils/minio_conn.py b/rag/utils/minio_conn.py
index 14a7067b9..18d3d3b6d 100644
--- a/rag/utils/minio_conn.py
+++ b/rag/utils/minio_conn.py
@@ -23,7 +23,7 @@ class HuMinio(object):
try:
self.conn = Minio(settings.MINIO["host"],
access_key=settings.MINIO["user"],
- secret_key=settings.MINIO["passwd"],
+ secret_key=settings.MINIO["password"],
secure=False
)
except Exception as e: