mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-08-11 22:28:57 +08:00
Fix: float transfer exception. (#6197)
### What problem does this PR solve? #6177 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
parent
222a2c8fa5
commit
1333d3c02a
@ -24,7 +24,7 @@ import trio
|
||||
from api.utils import get_uuid
|
||||
from graphrag.query_analyze_prompt import PROMPTS
|
||||
from graphrag.utils import get_entity_type2sampels, get_llm_cache, set_llm_cache, get_relation
|
||||
from rag.utils import num_tokens_from_string
|
||||
from rag.utils import num_tokens_from_string, get_float
|
||||
from rag.utils.doc_store_conn import OrderByExpr
|
||||
|
||||
from rag.nlp.search import Dealer, index_name
|
||||
@ -72,13 +72,13 @@ class KGSearch(Dealer):
|
||||
for f in flds:
|
||||
if f in ent and ent[f] is None:
|
||||
del ent[f]
|
||||
if float(ent.get("_score", 0)) < sim_thr:
|
||||
if get_float(ent.get("_score", 0)) < sim_thr:
|
||||
continue
|
||||
if isinstance(ent["entity_kwd"], list):
|
||||
ent["entity_kwd"] = ent["entity_kwd"][0]
|
||||
res[ent["entity_kwd"]] = {
|
||||
"sim": float(ent.get("_score", 0)),
|
||||
"pagerank": float(ent.get("rank_flt", 0)),
|
||||
"sim": get_float(ent.get("_score", 0)),
|
||||
"pagerank": get_float(ent.get("rank_flt", 0)),
|
||||
"n_hop_ents": json.loads(ent.get("n_hop_with_weight", "[]")),
|
||||
"description": ent.get("content_with_weight", "{}")
|
||||
}
|
||||
@ -89,7 +89,7 @@ class KGSearch(Dealer):
|
||||
es_res = self.dataStore.getFields(es_res, ["content_with_weight", "_score", "from_entity_kwd", "to_entity_kwd",
|
||||
"weight_int"])
|
||||
for _, ent in es_res.items():
|
||||
if float(ent["_score"]) < sim_thr:
|
||||
if get_float(ent["_score"]) < sim_thr:
|
||||
continue
|
||||
f, t = sorted([ent["from_entity_kwd"], ent["to_entity_kwd"]])
|
||||
if isinstance(f, list):
|
||||
@ -97,8 +97,8 @@ class KGSearch(Dealer):
|
||||
if isinstance(t, list):
|
||||
t = t[0]
|
||||
res[(f, t)] = {
|
||||
"sim": float(ent["_score"]),
|
||||
"pagerank": float(ent.get("weight_int", 0)),
|
||||
"sim": get_float(ent["_score"]),
|
||||
"pagerank": get_float(ent.get("weight_int", 0)),
|
||||
"description": ent["content_with_weight"]
|
||||
}
|
||||
return res
|
||||
|
@ -30,6 +30,8 @@ from docx import Document
|
||||
from PIL import Image
|
||||
from markdown import markdown
|
||||
|
||||
from rag.utils import get_float
|
||||
|
||||
|
||||
class Excel(ExcelParser):
|
||||
def __call__(self, fnm, binary=None, callback=None):
|
||||
@ -126,8 +128,8 @@ class Pdf(PdfParser):
|
||||
section, line_tag = box['text'], self._line_tag(box, zoomin)
|
||||
has_bull, index = has_qbullet(reg, box, last_box, last_index, last_bull, bull_x0_list)
|
||||
last_box, last_index, last_bull = box, index, has_bull
|
||||
line_pn = float(line_tag.lstrip('@@').split('\t')[0])
|
||||
line_top = float(line_tag.rstrip('##').split('\t')[3])
|
||||
line_pn = get_float(line_tag.lstrip('@@').split('\t')[0])
|
||||
line_top = get_float(line_tag.rstrip('##').split('\t')[3])
|
||||
tbl_pn, tbl_left, tbl_right, tbl_top, tbl_bottom, tbl_tag, tbl_text = self.get_tbls_info(tbls, tbl_index)
|
||||
if not has_bull: # No question bullet
|
||||
if not last_q:
|
||||
|
@ -18,7 +18,7 @@ import re
|
||||
from dataclasses import dataclass
|
||||
|
||||
from rag.settings import TAG_FLD, PAGERANK_FLD
|
||||
from rag.utils import rmSpace
|
||||
from rag.utils import rmSpace, get_float
|
||||
from rag.nlp import rag_tokenizer, query
|
||||
import numpy as np
|
||||
from rag.utils.doc_store_conn import DocStoreConnection, MatchDenseExpr, FusionExpr, OrderByExpr
|
||||
@ -49,7 +49,7 @@ class Dealer:
|
||||
if len(shape) > 1:
|
||||
raise Exception(
|
||||
f"Dealer.get_vector returned array's shape {shape} doesn't match expectation(exact one dimension).")
|
||||
embedding_data = [float(v) for v in qv]
|
||||
embedding_data = [get_float(v) for v in qv]
|
||||
vector_column_name = f"q_{len(embedding_data)}_vec"
|
||||
return MatchDenseExpr(vector_column_name, embedding_data, 'float', 'cosine', topk, {"similarity": similarity})
|
||||
|
||||
@ -153,7 +153,7 @@ class Dealer:
|
||||
|
||||
@staticmethod
|
||||
def trans2floats(txt):
|
||||
return [float(t) for t in txt.split("\t")]
|
||||
return [get_float(t) for t in txt.split("\t")]
|
||||
|
||||
def insert_citations(self, answer, chunks, chunk_v,
|
||||
embd_mdl, tkweight=0.1, vtweight=0.9):
|
||||
@ -282,7 +282,7 @@ class Dealer:
|
||||
for chunk_id in sres.ids:
|
||||
vector = sres.field[chunk_id].get(vector_column, zero_vector)
|
||||
if isinstance(vector, str):
|
||||
vector = [float(v) for v in vector.split("\t")]
|
||||
vector = [get_float(v) for v in vector.split("\t")]
|
||||
ins_embd.append(vector)
|
||||
if not ins_embd:
|
||||
return [], [], []
|
||||
|
@ -19,6 +19,7 @@ import re
|
||||
import tiktoken
|
||||
from api.utils.file_utils import get_project_base_directory
|
||||
|
||||
|
||||
def singleton(cls, *args, **kw):
|
||||
instances = {}
|
||||
|
||||
@ -89,3 +90,12 @@ def num_tokens_from_string(string: str) -> int:
|
||||
def truncate(string: str, max_len: int) -> str:
|
||||
"""Returns truncated text if the length of text exceed max_len."""
|
||||
return encoder.decode(encoder.encode(string)[:max_len])
|
||||
|
||||
|
||||
def get_float(v: str | None):
|
||||
if v is None:
|
||||
return float('-inf')
|
||||
try:
|
||||
return float(v)
|
||||
except Exception:
|
||||
return float('-inf')
|
@ -26,7 +26,7 @@ from elasticsearch_dsl import UpdateByQuery, Q, Search, Index
|
||||
from elastic_transport import ConnectionTimeout
|
||||
from rag import settings
|
||||
from rag.settings import TAG_FLD, PAGERANK_FLD
|
||||
from rag.utils import singleton
|
||||
from rag.utils import singleton, get_float
|
||||
from api.utils.file_utils import get_project_base_directory
|
||||
from rag.utils.doc_store_conn import DocStoreConnection, MatchExpr, OrderByExpr, MatchTextExpr, MatchDenseExpr, \
|
||||
FusionExpr
|
||||
@ -178,7 +178,7 @@ class ESConnection(DocStoreConnection):
|
||||
MatchDenseExpr) and isinstance(
|
||||
matchExprs[2], FusionExpr)
|
||||
weights = m.fusion_params["weights"]
|
||||
vector_similarity_weight = float(weights.split(",")[1])
|
||||
vector_similarity_weight = get_float(weights.split(",")[1])
|
||||
for m in matchExprs:
|
||||
if isinstance(m, MatchTextExpr):
|
||||
minimum_should_match = m.extra_options.get("minimum_should_match", 0.0)
|
||||
|
Loading…
x
Reference in New Issue
Block a user