mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-08-15 00:05:53 +08:00
Synchronize with enterprise version (#4325)
### Type of change - [x] Refactoring
This commit is contained in:
parent
564277736a
commit
50f209204e
@ -336,7 +336,7 @@
|
|||||||
"parameters": [],
|
"parameters": [],
|
||||||
"presencePenaltyEnabled": true,
|
"presencePenaltyEnabled": true,
|
||||||
"presence_penalty": 0.4,
|
"presence_penalty": 0.4,
|
||||||
"prompt": "Role: You are a customer support. \n\nTask: Please answer the question based on content of knowledge base. \n\nReuirements & restrictions:\n - DO NOT make things up when all knowledge base content is irrelevant to the question. \n - Answers need to consider chat history.\n - Request about customer's contact information like, Wechat number, LINE number, twitter, discord, etc,. , when knowlegebase content can't answer his question. So, product expert could contact him soon to solve his problem.\n\n Knowledge base content is as following:\n {input}\n The above is the content of knowledge base.",
|
"prompt": "Role: You are a customer support. \n\nTask: Please answer the question based on content of knowledge base. \n\nRequirements & restrictions:\n - DO NOT make things up when all knowledge base content is irrelevant to the question. \n - Answers need to consider chat history.\n - Request about customer's contact information like, Wechat number, LINE number, twitter, discord, etc,. , when knowledge base content can't answer his question. So, product expert could contact him soon to solve his problem.\n\n Knowledge base content is as following:\n {input}\n The above is the content of knowledge base.",
|
||||||
"temperature": 0.1,
|
"temperature": 0.1,
|
||||||
"temperatureEnabled": true,
|
"temperatureEnabled": true,
|
||||||
"topPEnabled": true,
|
"topPEnabled": true,
|
||||||
@ -603,7 +603,7 @@
|
|||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"form": {
|
"form": {
|
||||||
"text": "Static messages.\nDefine replys after recieve user's contact information."
|
"text": "Static messages.\nDefine response after receive user's contact information."
|
||||||
},
|
},
|
||||||
"label": "Note",
|
"label": "Note",
|
||||||
"name": "N: What else?"
|
"name": "N: What else?"
|
||||||
@ -691,7 +691,7 @@
|
|||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"form": {
|
"form": {
|
||||||
"text": "Complete questions by conversation history.\nUser: What's RAGFlow?\nAssistant: RAGFlow is xxx.\nUser: How to deloy it?\n\nRefine it: How to deploy RAGFlow?"
|
"text": "Complete questions by conversation history.\nUser: What's RAGFlow?\nAssistant: RAGFlow is xxx.\nUser: How to deploy it?\n\nRefine it: How to deploy RAGFlow?"
|
||||||
},
|
},
|
||||||
"label": "Note",
|
"label": "Note",
|
||||||
"name": "N: Refine Question"
|
"name": "N: Refine Question"
|
||||||
|
@ -9,7 +9,7 @@ def chunk(filename, binary, tenant_id, from_page=0, to_page=100000,
|
|||||||
lang="Chinese", callback=None, **kwargs):
|
lang="Chinese", callback=None, **kwargs):
|
||||||
parser_config = kwargs.get(
|
parser_config = kwargs.get(
|
||||||
"parser_config", {
|
"parser_config", {
|
||||||
"chunk_token_num": 512, "delimiter": "\n!?。;!?", "layout_recognize": True})
|
"chunk_token_num": 512, "delimiter": "\n!?;。;!?", "layout_recognize": True})
|
||||||
eng = lang.lower() == "english"
|
eng = lang.lower() == "english"
|
||||||
|
|
||||||
parser_config["layout_recognize"] = True
|
parser_config["layout_recognize"] = True
|
||||||
@ -29,4 +29,4 @@ def chunk(filename, binary, tenant_id, from_page=0, to_page=100000,
|
|||||||
doc["title_sm_tks"] = rag_tokenizer.fine_grained_tokenize(doc["title_tks"])
|
doc["title_sm_tks"] = rag_tokenizer.fine_grained_tokenize(doc["title_tks"])
|
||||||
chunks.extend(tokenize_chunks(sections, doc, eng))
|
chunks.extend(tokenize_chunks(sections, doc, eng))
|
||||||
|
|
||||||
return chunks
|
return chunks
|
||||||
|
@ -256,7 +256,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
|||||||
res.extend(tokenize_chunks(chunks, doc, eng, pdf_parser))
|
res.extend(tokenize_chunks(chunks, doc, eng, pdf_parser))
|
||||||
return res
|
return res
|
||||||
|
|
||||||
elif re.search(r"\.docx$", filename, re.IGNORECASE):
|
elif re.search(r"\.docx?$", filename, re.IGNORECASE):
|
||||||
docx_parser = Docx()
|
docx_parser = Docx()
|
||||||
ti_list, tbls = docx_parser(filename, binary,
|
ti_list, tbls = docx_parser(filename, binary,
|
||||||
from_page=0, to_page=10000, callback=callback)
|
from_page=0, to_page=10000, callback=callback)
|
||||||
@ -279,4 +279,4 @@ if __name__ == "__main__":
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
chunk(sys.argv[1], callback=dummy)
|
chunk(sys.argv[1], callback=dummy)
|
||||||
|
@ -24,7 +24,6 @@ import openai
|
|||||||
from ollama import Client
|
from ollama import Client
|
||||||
from rag.nlp import is_chinese, is_english
|
from rag.nlp import is_chinese, is_english
|
||||||
from rag.utils import num_tokens_from_string
|
from rag.utils import num_tokens_from_string
|
||||||
from groq import Groq
|
|
||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
import requests
|
import requests
|
||||||
@ -840,6 +839,7 @@ class GeminiChat(Base):
|
|||||||
|
|
||||||
class GroqChat:
|
class GroqChat:
|
||||||
def __init__(self, key, model_name, base_url=''):
|
def __init__(self, key, model_name, base_url=''):
|
||||||
|
from groq import Groq
|
||||||
self.client = Groq(api_key=key)
|
self.client = Groq(api_key=key)
|
||||||
self.model_name = model_name
|
self.model_name = model_name
|
||||||
|
|
||||||
|
@ -299,8 +299,6 @@ class SparkTTS:
|
|||||||
yield audio_chunk
|
yield audio_chunk
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class XinferenceTTS:
|
class XinferenceTTS:
|
||||||
def __init__(self, key, model_name, **kwargs):
|
def __init__(self, key, model_name, **kwargs):
|
||||||
self.base_url = kwargs.get("base_url", None)
|
self.base_url = kwargs.get("base_url", None)
|
||||||
@ -330,3 +328,30 @@ class XinferenceTTS:
|
|||||||
for chunk in response.iter_content(chunk_size=1024):
|
for chunk in response.iter_content(chunk_size=1024):
|
||||||
if chunk:
|
if chunk:
|
||||||
yield chunk
|
yield chunk
|
||||||
|
|
||||||
|
|
||||||
|
class OllamaTTS(Base):
|
||||||
|
def __init__(self, key, model_name="ollama-tts", base_url="https://api.ollama.ai/v1"):
|
||||||
|
if not base_url:
|
||||||
|
base_url = "https://api.ollama.ai/v1"
|
||||||
|
self.model_name = model_name
|
||||||
|
self.base_url = base_url
|
||||||
|
self.headers = {
|
||||||
|
"Content-Type": "application/json"
|
||||||
|
}
|
||||||
|
|
||||||
|
def tts(self, text, voice="standard-voice"):
|
||||||
|
payload = {
|
||||||
|
"model": self.model_name,
|
||||||
|
"voice": voice,
|
||||||
|
"input": text
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.post(f"{self.base_url}/audio/tts", headers=self.headers, json=payload, stream=True)
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
raise Exception(f"**Error**: {response.status_code}, {response.text}")
|
||||||
|
|
||||||
|
for chunk in response.iter_content():
|
||||||
|
if chunk:
|
||||||
|
yield chunk
|
||||||
|
@ -1,60 +1,60 @@
|
|||||||
#
|
#
|
||||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
# you may not use this file except in compliance with the License.
|
# you may not use this file except in compliance with the License.
|
||||||
# You may obtain a copy of the License at
|
# You may obtain a copy of the License at
|
||||||
#
|
#
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
#
|
#
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
#
|
#
|
||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
from api.db.db_models import close_connection
|
from api.db.db_models import close_connection
|
||||||
from api.db.services.task_service import TaskService
|
from api.db.services.task_service import TaskService
|
||||||
from rag.utils.storage_factory import STORAGE_IMPL
|
from rag.utils.minio_conn import MINIOs
|
||||||
from rag.utils.redis_conn import REDIS_CONN
|
from rag.utils.redis_conn import REDIS_CONN
|
||||||
|
|
||||||
|
|
||||||
def collect():
|
def collect():
|
||||||
doc_locations = TaskService.get_ongoing_doc_name()
|
doc_locations = TaskService.get_ongoing_doc_name()
|
||||||
logging.debug(doc_locations)
|
logging.debug(doc_locations)
|
||||||
if len(doc_locations) == 0:
|
if len(doc_locations) == 0:
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
return
|
return
|
||||||
return doc_locations
|
return doc_locations
|
||||||
|
|
||||||
def main():
|
|
||||||
locations = collect()
|
def main():
|
||||||
if not locations:
|
locations = collect()
|
||||||
return
|
if not locations:
|
||||||
logging.info(f"TASKS: {len(locations)}")
|
return
|
||||||
for kb_id, loc in locations:
|
logging.info(f"TASKS: {len(locations)}")
|
||||||
try:
|
for kb_id, loc in locations:
|
||||||
if REDIS_CONN.is_alive():
|
try:
|
||||||
try:
|
if REDIS_CONN.is_alive():
|
||||||
key = "{}/{}".format(kb_id, loc)
|
try:
|
||||||
if REDIS_CONN.exist(key):
|
key = "{}/{}".format(kb_id, loc)
|
||||||
continue
|
if REDIS_CONN.exist(key):
|
||||||
file_bin = STORAGE_IMPL.get(kb_id, loc)
|
continue
|
||||||
REDIS_CONN.transaction(key, file_bin, 12 * 60)
|
file_bin = MINIOs.get(kb_id, loc)
|
||||||
logging.info("CACHE: {}".format(loc))
|
REDIS_CONN.transaction(key, file_bin, 12 * 60)
|
||||||
except Exception as e:
|
logging.info("CACHE: {}".format(loc))
|
||||||
traceback.print_stack(e)
|
except Exception as e:
|
||||||
except Exception as e:
|
traceback.print_stack(e)
|
||||||
traceback.print_stack(e)
|
except Exception as e:
|
||||||
|
traceback.print_stack(e)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
while True:
|
while True:
|
||||||
main()
|
main()
|
||||||
close_connection()
|
close_connection()
|
||||||
time.sleep(1)
|
time.sleep(1)
|
Loading…
x
Reference in New Issue
Block a user