style: fix typo and format code (#2618)

### What problem does this PR solve?

- Fix typo
- Remove unused import
- Format code

### Type of change

- [x] Other (please describe): typo and format
This commit is contained in:
yqkcn 2024-09-27 13:17:25 +08:00 committed by GitHub
parent 4c0b79c4f6
commit 34abcf7704
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 12 additions and 16 deletions

View File

@ -169,8 +169,8 @@ class TenantLLMService(CommonService):
num = 0 num = 0
try: try:
for u in cls.query(tenant_id = tenant_id, llm_name=mdlnm): for u in cls.query(tenant_id=tenant_id, llm_name=mdlnm):
num += cls.model.update(used_tokens = u.used_tokens + used_tokens)\ num += cls.model.update(used_tokens=u.used_tokens + used_tokens)\
.where(cls.model.tenant_id == tenant_id, cls.model.llm_name == mdlnm)\ .where(cls.model.tenant_id == tenant_id, cls.model.llm_name == mdlnm)\
.execute() .execute()
except Exception as e: except Exception as e:
@ -252,7 +252,6 @@ class LLMBundle(object):
return return
yield chunk yield chunk
def chat(self, system, history, gen_conf): def chat(self, system, history, gen_conf):
txt, used_tokens = self.mdl.chat(system, history, gen_conf) txt, used_tokens = self.mdl.chat(system, history, gen_conf)
if not TenantLLMService.increase_usage( if not TenantLLMService.increase_usage(

View File

@ -13,7 +13,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import re
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
import json import json
from functools import reduce from functools import reduce
@ -24,7 +23,7 @@ from api.db.services.llm_service import LLMBundle
from api.db.services.user_service import TenantService from api.db.services.user_service import TenantService
from graphrag.community_reports_extractor import CommunityReportsExtractor from graphrag.community_reports_extractor import CommunityReportsExtractor
from graphrag.entity_resolution import EntityResolution from graphrag.entity_resolution import EntityResolution
from graphrag.graph_extractor import GraphExtractor from graphrag.graph_extractor import GraphExtractor, DEFAULT_ENTITY_TYPES
from graphrag.mind_map_extractor import MindMapExtractor from graphrag.mind_map_extractor import MindMapExtractor
from rag.nlp import rag_tokenizer from rag.nlp import rag_tokenizer
from rag.utils import num_tokens_from_string from rag.utils import num_tokens_from_string
@ -52,7 +51,7 @@ def graph_merge(g1, g2):
return g return g
def build_knowlege_graph_chunks(tenant_id: str, chunks: List[str], callback, entity_types=["organization", "person", "location", "event", "time"]): def build_knowledge_graph_chunks(tenant_id: str, chunks: List[str], callback, entity_types=DEFAULT_ENTITY_TYPES):
_, tenant = TenantService.get_by_id(tenant_id) _, tenant = TenantService.get_by_id(tenant_id)
llm_bdl = LLMBundle(tenant_id, LLMType.CHAT, tenant.llm_id) llm_bdl = LLMBundle(tenant_id, LLMType.CHAT, tenant.llm_id)
ext = GraphExtractor(llm_bdl) ext = GraphExtractor(llm_bdl)

View File

@ -1,6 +1,6 @@
import re import re
from graphrag.index import build_knowlege_graph_chunks from graphrag.index import build_knowledge_graph_chunks
from rag.app import naive from rag.app import naive
from rag.nlp import rag_tokenizer, tokenize_chunks from rag.nlp import rag_tokenizer, tokenize_chunks
@ -15,9 +15,9 @@ def chunk(filename, binary, tenant_id, from_page=0, to_page=100000,
parser_config["layout_recognize"] = False parser_config["layout_recognize"] = False
sections = naive.chunk(filename, binary, from_page=from_page, to_page=to_page, section_only=True, sections = naive.chunk(filename, binary, from_page=from_page, to_page=to_page, section_only=True,
parser_config=parser_config, callback=callback) parser_config=parser_config, callback=callback)
chunks = build_knowlege_graph_chunks(tenant_id, sections, callback, chunks = build_knowledge_graph_chunks(tenant_id, sections, callback,
parser_config.get("entity_types", ["organization", "person", "location", "event", "time"]) parser_config.get("entity_types", ["organization", "person", "location", "event", "time"])
) )
for c in chunks: c["docnm_kwd"] = filename for c in chunks: c["docnm_kwd"] = filename
doc = { doc = {

View File

@ -20,7 +20,6 @@ from abc import ABC
from openai import OpenAI from openai import OpenAI
import openai import openai
from ollama import Client from ollama import Client
from volcengine.maas.v2 import MaasService
from rag.nlp import is_english from rag.nlp import is_english
from rag.utils import num_tokens_from_string from rag.utils import num_tokens_from_string
from groq import Groq from groq import Groq
@ -29,6 +28,7 @@ import json
import requests import requests
import asyncio import asyncio
class Base(ABC): class Base(ABC):
def __init__(self, key, model_name, base_url): def __init__(self, key, model_name, base_url):
self.client = OpenAI(api_key=key, base_url=base_url) self.client = OpenAI(api_key=key, base_url=base_url)

View File

@ -78,11 +78,9 @@ encoder = tiktoken.encoding_for_model("gpt-3.5-turbo")
def num_tokens_from_string(string: str) -> int: def num_tokens_from_string(string: str) -> int:
"""Returns the number of tokens in a text string.""" """Returns the number of tokens in a text string."""
try: try:
num_tokens = len(encoder.encode(string)) return len(encoder.encode(string))
return num_tokens except Exception:
except Exception as e: return 0
pass
return 0
def truncate(string: str, max_len: int) -> str: def truncate(string: str, max_len: int) -> str: