mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-08-14 05:36:01 +08:00
style: fix typo and format code (#2618)
### What problem does this PR solve? - Fix typo - Remove unused import - Format code ### Type of change - [x] Other (please describe): typo and format
This commit is contained in:
parent
4c0b79c4f6
commit
34abcf7704
@ -252,7 +252,6 @@ class LLMBundle(object):
|
|||||||
return
|
return
|
||||||
yield chunk
|
yield chunk
|
||||||
|
|
||||||
|
|
||||||
def chat(self, system, history, gen_conf):
|
def chat(self, system, history, gen_conf):
|
||||||
txt, used_tokens = self.mdl.chat(system, history, gen_conf)
|
txt, used_tokens = self.mdl.chat(system, history, gen_conf)
|
||||||
if not TenantLLMService.increase_usage(
|
if not TenantLLMService.increase_usage(
|
||||||
|
@ -13,7 +13,6 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
#
|
#
|
||||||
import re
|
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
import json
|
import json
|
||||||
from functools import reduce
|
from functools import reduce
|
||||||
@ -24,7 +23,7 @@ from api.db.services.llm_service import LLMBundle
|
|||||||
from api.db.services.user_service import TenantService
|
from api.db.services.user_service import TenantService
|
||||||
from graphrag.community_reports_extractor import CommunityReportsExtractor
|
from graphrag.community_reports_extractor import CommunityReportsExtractor
|
||||||
from graphrag.entity_resolution import EntityResolution
|
from graphrag.entity_resolution import EntityResolution
|
||||||
from graphrag.graph_extractor import GraphExtractor
|
from graphrag.graph_extractor import GraphExtractor, DEFAULT_ENTITY_TYPES
|
||||||
from graphrag.mind_map_extractor import MindMapExtractor
|
from graphrag.mind_map_extractor import MindMapExtractor
|
||||||
from rag.nlp import rag_tokenizer
|
from rag.nlp import rag_tokenizer
|
||||||
from rag.utils import num_tokens_from_string
|
from rag.utils import num_tokens_from_string
|
||||||
@ -52,7 +51,7 @@ def graph_merge(g1, g2):
|
|||||||
return g
|
return g
|
||||||
|
|
||||||
|
|
||||||
def build_knowlege_graph_chunks(tenant_id: str, chunks: List[str], callback, entity_types=["organization", "person", "location", "event", "time"]):
|
def build_knowledge_graph_chunks(tenant_id: str, chunks: List[str], callback, entity_types=DEFAULT_ENTITY_TYPES):
|
||||||
_, tenant = TenantService.get_by_id(tenant_id)
|
_, tenant = TenantService.get_by_id(tenant_id)
|
||||||
llm_bdl = LLMBundle(tenant_id, LLMType.CHAT, tenant.llm_id)
|
llm_bdl = LLMBundle(tenant_id, LLMType.CHAT, tenant.llm_id)
|
||||||
ext = GraphExtractor(llm_bdl)
|
ext = GraphExtractor(llm_bdl)
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from graphrag.index import build_knowlege_graph_chunks
|
from graphrag.index import build_knowledge_graph_chunks
|
||||||
from rag.app import naive
|
from rag.app import naive
|
||||||
from rag.nlp import rag_tokenizer, tokenize_chunks
|
from rag.nlp import rag_tokenizer, tokenize_chunks
|
||||||
|
|
||||||
@ -15,7 +15,7 @@ def chunk(filename, binary, tenant_id, from_page=0, to_page=100000,
|
|||||||
parser_config["layout_recognize"] = False
|
parser_config["layout_recognize"] = False
|
||||||
sections = naive.chunk(filename, binary, from_page=from_page, to_page=to_page, section_only=True,
|
sections = naive.chunk(filename, binary, from_page=from_page, to_page=to_page, section_only=True,
|
||||||
parser_config=parser_config, callback=callback)
|
parser_config=parser_config, callback=callback)
|
||||||
chunks = build_knowlege_graph_chunks(tenant_id, sections, callback,
|
chunks = build_knowledge_graph_chunks(tenant_id, sections, callback,
|
||||||
parser_config.get("entity_types", ["organization", "person", "location", "event", "time"])
|
parser_config.get("entity_types", ["organization", "person", "location", "event", "time"])
|
||||||
)
|
)
|
||||||
for c in chunks: c["docnm_kwd"] = filename
|
for c in chunks: c["docnm_kwd"] = filename
|
||||||
|
@ -20,7 +20,6 @@ from abc import ABC
|
|||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
import openai
|
import openai
|
||||||
from ollama import Client
|
from ollama import Client
|
||||||
from volcengine.maas.v2 import MaasService
|
|
||||||
from rag.nlp import is_english
|
from rag.nlp import is_english
|
||||||
from rag.utils import num_tokens_from_string
|
from rag.utils import num_tokens_from_string
|
||||||
from groq import Groq
|
from groq import Groq
|
||||||
@ -29,6 +28,7 @@ import json
|
|||||||
import requests
|
import requests
|
||||||
import asyncio
|
import asyncio
|
||||||
|
|
||||||
|
|
||||||
class Base(ABC):
|
class Base(ABC):
|
||||||
def __init__(self, key, model_name, base_url):
|
def __init__(self, key, model_name, base_url):
|
||||||
self.client = OpenAI(api_key=key, base_url=base_url)
|
self.client = OpenAI(api_key=key, base_url=base_url)
|
||||||
|
@ -78,10 +78,8 @@ encoder = tiktoken.encoding_for_model("gpt-3.5-turbo")
|
|||||||
def num_tokens_from_string(string: str) -> int:
|
def num_tokens_from_string(string: str) -> int:
|
||||||
"""Returns the number of tokens in a text string."""
|
"""Returns the number of tokens in a text string."""
|
||||||
try:
|
try:
|
||||||
num_tokens = len(encoder.encode(string))
|
return len(encoder.encode(string))
|
||||||
return num_tokens
|
except Exception:
|
||||||
except Exception as e:
|
|
||||||
pass
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user