Synchronize with enterprise version (#4325)

### Type of change

- [x] Refactoring
This commit is contained in:
Yingfeng 2025-01-02 13:44:44 +08:00 committed by GitHub
parent 564277736a
commit 50f209204e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 94 additions and 69 deletions

View File

@ -336,7 +336,7 @@
"parameters": [], "parameters": [],
"presencePenaltyEnabled": true, "presencePenaltyEnabled": true,
"presence_penalty": 0.4, "presence_penalty": 0.4,
"prompt": "Role: You are a customer support. \n\nTask: Please answer the question based on content of knowledge base. \n\nReuirements & restrictions:\n - DO NOT make things up when all knowledge base content is irrelevant to the question. \n - Answers need to consider chat history.\n - Request about customer's contact information like, Wechat number, LINE number, twitter, discord, etc,. , when knowlegebase content can't answer his question. So, product expert could contact him soon to solve his problem.\n\n Knowledge base content is as following:\n {input}\n The above is the content of knowledge base.", "prompt": "Role: You are a customer support. \n\nTask: Please answer the question based on content of knowledge base. \n\nRequirements & restrictions:\n - DO NOT make things up when all knowledge base content is irrelevant to the question. \n - Answers need to consider chat history.\n - Request about customer's contact information like, Wechat number, LINE number, twitter, discord, etc,. , when knowledge base content can't answer his question. So, product expert could contact him soon to solve his problem.\n\n Knowledge base content is as following:\n {input}\n The above is the content of knowledge base.",
"temperature": 0.1, "temperature": 0.1,
"temperatureEnabled": true, "temperatureEnabled": true,
"topPEnabled": true, "topPEnabled": true,
@ -603,7 +603,7 @@
{ {
"data": { "data": {
"form": { "form": {
"text": "Static messages.\nDefine replys after recieve user's contact information." "text": "Static messages.\nDefine response after receive user's contact information."
}, },
"label": "Note", "label": "Note",
"name": "N: What else?" "name": "N: What else?"
@ -691,7 +691,7 @@
{ {
"data": { "data": {
"form": { "form": {
"text": "Complete questions by conversation history.\nUser: What's RAGFlow?\nAssistant: RAGFlow is xxx.\nUser: How to deloy it?\n\nRefine it: How to deploy RAGFlow?" "text": "Complete questions by conversation history.\nUser: What's RAGFlow?\nAssistant: RAGFlow is xxx.\nUser: How to deploy it?\n\nRefine it: How to deploy RAGFlow?"
}, },
"label": "Note", "label": "Note",
"name": "N: Refine Question" "name": "N: Refine Question"

View File

@ -9,7 +9,7 @@ def chunk(filename, binary, tenant_id, from_page=0, to_page=100000,
lang="Chinese", callback=None, **kwargs): lang="Chinese", callback=None, **kwargs):
parser_config = kwargs.get( parser_config = kwargs.get(
"parser_config", { "parser_config", {
"chunk_token_num": 512, "delimiter": "\n!?。;!?", "layout_recognize": True}) "chunk_token_num": 512, "delimiter": "\n!?;。;!?", "layout_recognize": True})
eng = lang.lower() == "english" eng = lang.lower() == "english"
parser_config["layout_recognize"] = True parser_config["layout_recognize"] = True

View File

@ -256,7 +256,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
res.extend(tokenize_chunks(chunks, doc, eng, pdf_parser)) res.extend(tokenize_chunks(chunks, doc, eng, pdf_parser))
return res return res
elif re.search(r"\.docx$", filename, re.IGNORECASE): elif re.search(r"\.docx?$", filename, re.IGNORECASE):
docx_parser = Docx() docx_parser = Docx()
ti_list, tbls = docx_parser(filename, binary, ti_list, tbls = docx_parser(filename, binary,
from_page=0, to_page=10000, callback=callback) from_page=0, to_page=10000, callback=callback)

View File

@ -24,7 +24,6 @@ import openai
from ollama import Client from ollama import Client
from rag.nlp import is_chinese, is_english from rag.nlp import is_chinese, is_english
from rag.utils import num_tokens_from_string from rag.utils import num_tokens_from_string
from groq import Groq
import os import os
import json import json
import requests import requests
@ -840,6 +839,7 @@ class GeminiChat(Base):
class GroqChat: class GroqChat:
def __init__(self, key, model_name, base_url=''): def __init__(self, key, model_name, base_url=''):
from groq import Groq
self.client = Groq(api_key=key) self.client = Groq(api_key=key)
self.model_name = model_name self.model_name = model_name

View File

@ -299,8 +299,6 @@ class SparkTTS:
yield audio_chunk yield audio_chunk
class XinferenceTTS: class XinferenceTTS:
def __init__(self, key, model_name, **kwargs): def __init__(self, key, model_name, **kwargs):
self.base_url = kwargs.get("base_url", None) self.base_url = kwargs.get("base_url", None)
@ -330,3 +328,30 @@ class XinferenceTTS:
for chunk in response.iter_content(chunk_size=1024): for chunk in response.iter_content(chunk_size=1024):
if chunk: if chunk:
yield chunk yield chunk
class OllamaTTS(Base):
def __init__(self, key, model_name="ollama-tts", base_url="https://api.ollama.ai/v1"):
if not base_url:
base_url = "https://api.ollama.ai/v1"
self.model_name = model_name
self.base_url = base_url
self.headers = {
"Content-Type": "application/json"
}
def tts(self, text, voice="standard-voice"):
payload = {
"model": self.model_name,
"voice": voice,
"input": text
}
response = requests.post(f"{self.base_url}/audio/tts", headers=self.headers, json=payload, stream=True)
if response.status_code != 200:
raise Exception(f"**Error**: {response.status_code}, {response.text}")
for chunk in response.iter_content():
if chunk:
yield chunk

View File

@ -19,7 +19,7 @@ import traceback
from api.db.db_models import close_connection from api.db.db_models import close_connection
from api.db.services.task_service import TaskService from api.db.services.task_service import TaskService
from rag.utils.storage_factory import STORAGE_IMPL from rag.utils.minio_conn import MINIOs
from rag.utils.redis_conn import REDIS_CONN from rag.utils.redis_conn import REDIS_CONN
@ -31,6 +31,7 @@ def collect():
return return
return doc_locations return doc_locations
def main(): def main():
locations = collect() locations = collect()
if not locations: if not locations:
@ -43,7 +44,7 @@ def main():
key = "{}/{}".format(kb_id, loc) key = "{}/{}".format(kb_id, loc)
if REDIS_CONN.exist(key): if REDIS_CONN.exist(key):
continue continue
file_bin = STORAGE_IMPL.get(kb_id, loc) file_bin = MINIOs.get(kb_id, loc)
REDIS_CONN.transaction(key, file_bin, 12 * 60) REDIS_CONN.transaction(key, file_bin, 12 * 60)
logging.info("CACHE: {}".format(loc)) logging.info("CACHE: {}".format(loc))
except Exception as e: except Exception as e:
@ -52,7 +53,6 @@ def main():
traceback.print_stack(e) traceback.print_stack(e)
if __name__ == "__main__": if __name__ == "__main__":
while True: while True:
main() main()