mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-08-12 08:29:00 +08:00
Fix: float transfer exception. (#6197)
### What problem does this PR solve? #6177 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
parent
222a2c8fa5
commit
1333d3c02a
@ -24,7 +24,7 @@ import trio
|
|||||||
from api.utils import get_uuid
|
from api.utils import get_uuid
|
||||||
from graphrag.query_analyze_prompt import PROMPTS
|
from graphrag.query_analyze_prompt import PROMPTS
|
||||||
from graphrag.utils import get_entity_type2sampels, get_llm_cache, set_llm_cache, get_relation
|
from graphrag.utils import get_entity_type2sampels, get_llm_cache, set_llm_cache, get_relation
|
||||||
from rag.utils import num_tokens_from_string
|
from rag.utils import num_tokens_from_string, get_float
|
||||||
from rag.utils.doc_store_conn import OrderByExpr
|
from rag.utils.doc_store_conn import OrderByExpr
|
||||||
|
|
||||||
from rag.nlp.search import Dealer, index_name
|
from rag.nlp.search import Dealer, index_name
|
||||||
@ -72,13 +72,13 @@ class KGSearch(Dealer):
|
|||||||
for f in flds:
|
for f in flds:
|
||||||
if f in ent and ent[f] is None:
|
if f in ent and ent[f] is None:
|
||||||
del ent[f]
|
del ent[f]
|
||||||
if float(ent.get("_score", 0)) < sim_thr:
|
if get_float(ent.get("_score", 0)) < sim_thr:
|
||||||
continue
|
continue
|
||||||
if isinstance(ent["entity_kwd"], list):
|
if isinstance(ent["entity_kwd"], list):
|
||||||
ent["entity_kwd"] = ent["entity_kwd"][0]
|
ent["entity_kwd"] = ent["entity_kwd"][0]
|
||||||
res[ent["entity_kwd"]] = {
|
res[ent["entity_kwd"]] = {
|
||||||
"sim": float(ent.get("_score", 0)),
|
"sim": get_float(ent.get("_score", 0)),
|
||||||
"pagerank": float(ent.get("rank_flt", 0)),
|
"pagerank": get_float(ent.get("rank_flt", 0)),
|
||||||
"n_hop_ents": json.loads(ent.get("n_hop_with_weight", "[]")),
|
"n_hop_ents": json.loads(ent.get("n_hop_with_weight", "[]")),
|
||||||
"description": ent.get("content_with_weight", "{}")
|
"description": ent.get("content_with_weight", "{}")
|
||||||
}
|
}
|
||||||
@ -89,7 +89,7 @@ class KGSearch(Dealer):
|
|||||||
es_res = self.dataStore.getFields(es_res, ["content_with_weight", "_score", "from_entity_kwd", "to_entity_kwd",
|
es_res = self.dataStore.getFields(es_res, ["content_with_weight", "_score", "from_entity_kwd", "to_entity_kwd",
|
||||||
"weight_int"])
|
"weight_int"])
|
||||||
for _, ent in es_res.items():
|
for _, ent in es_res.items():
|
||||||
if float(ent["_score"]) < sim_thr:
|
if get_float(ent["_score"]) < sim_thr:
|
||||||
continue
|
continue
|
||||||
f, t = sorted([ent["from_entity_kwd"], ent["to_entity_kwd"]])
|
f, t = sorted([ent["from_entity_kwd"], ent["to_entity_kwd"]])
|
||||||
if isinstance(f, list):
|
if isinstance(f, list):
|
||||||
@ -97,8 +97,8 @@ class KGSearch(Dealer):
|
|||||||
if isinstance(t, list):
|
if isinstance(t, list):
|
||||||
t = t[0]
|
t = t[0]
|
||||||
res[(f, t)] = {
|
res[(f, t)] = {
|
||||||
"sim": float(ent["_score"]),
|
"sim": get_float(ent["_score"]),
|
||||||
"pagerank": float(ent.get("weight_int", 0)),
|
"pagerank": get_float(ent.get("weight_int", 0)),
|
||||||
"description": ent["content_with_weight"]
|
"description": ent["content_with_weight"]
|
||||||
}
|
}
|
||||||
return res
|
return res
|
||||||
|
@ -30,6 +30,8 @@ from docx import Document
|
|||||||
from PIL import Image
|
from PIL import Image
|
||||||
from markdown import markdown
|
from markdown import markdown
|
||||||
|
|
||||||
|
from rag.utils import get_float
|
||||||
|
|
||||||
|
|
||||||
class Excel(ExcelParser):
|
class Excel(ExcelParser):
|
||||||
def __call__(self, fnm, binary=None, callback=None):
|
def __call__(self, fnm, binary=None, callback=None):
|
||||||
@ -126,8 +128,8 @@ class Pdf(PdfParser):
|
|||||||
section, line_tag = box['text'], self._line_tag(box, zoomin)
|
section, line_tag = box['text'], self._line_tag(box, zoomin)
|
||||||
has_bull, index = has_qbullet(reg, box, last_box, last_index, last_bull, bull_x0_list)
|
has_bull, index = has_qbullet(reg, box, last_box, last_index, last_bull, bull_x0_list)
|
||||||
last_box, last_index, last_bull = box, index, has_bull
|
last_box, last_index, last_bull = box, index, has_bull
|
||||||
line_pn = float(line_tag.lstrip('@@').split('\t')[0])
|
line_pn = get_float(line_tag.lstrip('@@').split('\t')[0])
|
||||||
line_top = float(line_tag.rstrip('##').split('\t')[3])
|
line_top = get_float(line_tag.rstrip('##').split('\t')[3])
|
||||||
tbl_pn, tbl_left, tbl_right, tbl_top, tbl_bottom, tbl_tag, tbl_text = self.get_tbls_info(tbls, tbl_index)
|
tbl_pn, tbl_left, tbl_right, tbl_top, tbl_bottom, tbl_tag, tbl_text = self.get_tbls_info(tbls, tbl_index)
|
||||||
if not has_bull: # No question bullet
|
if not has_bull: # No question bullet
|
||||||
if not last_q:
|
if not last_q:
|
||||||
|
@ -18,7 +18,7 @@ import re
|
|||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
from rag.settings import TAG_FLD, PAGERANK_FLD
|
from rag.settings import TAG_FLD, PAGERANK_FLD
|
||||||
from rag.utils import rmSpace
|
from rag.utils import rmSpace, get_float
|
||||||
from rag.nlp import rag_tokenizer, query
|
from rag.nlp import rag_tokenizer, query
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from rag.utils.doc_store_conn import DocStoreConnection, MatchDenseExpr, FusionExpr, OrderByExpr
|
from rag.utils.doc_store_conn import DocStoreConnection, MatchDenseExpr, FusionExpr, OrderByExpr
|
||||||
@ -49,7 +49,7 @@ class Dealer:
|
|||||||
if len(shape) > 1:
|
if len(shape) > 1:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
f"Dealer.get_vector returned array's shape {shape} doesn't match expectation(exact one dimension).")
|
f"Dealer.get_vector returned array's shape {shape} doesn't match expectation(exact one dimension).")
|
||||||
embedding_data = [float(v) for v in qv]
|
embedding_data = [get_float(v) for v in qv]
|
||||||
vector_column_name = f"q_{len(embedding_data)}_vec"
|
vector_column_name = f"q_{len(embedding_data)}_vec"
|
||||||
return MatchDenseExpr(vector_column_name, embedding_data, 'float', 'cosine', topk, {"similarity": similarity})
|
return MatchDenseExpr(vector_column_name, embedding_data, 'float', 'cosine', topk, {"similarity": similarity})
|
||||||
|
|
||||||
@ -153,7 +153,7 @@ class Dealer:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def trans2floats(txt):
|
def trans2floats(txt):
|
||||||
return [float(t) for t in txt.split("\t")]
|
return [get_float(t) for t in txt.split("\t")]
|
||||||
|
|
||||||
def insert_citations(self, answer, chunks, chunk_v,
|
def insert_citations(self, answer, chunks, chunk_v,
|
||||||
embd_mdl, tkweight=0.1, vtweight=0.9):
|
embd_mdl, tkweight=0.1, vtweight=0.9):
|
||||||
@ -282,7 +282,7 @@ class Dealer:
|
|||||||
for chunk_id in sres.ids:
|
for chunk_id in sres.ids:
|
||||||
vector = sres.field[chunk_id].get(vector_column, zero_vector)
|
vector = sres.field[chunk_id].get(vector_column, zero_vector)
|
||||||
if isinstance(vector, str):
|
if isinstance(vector, str):
|
||||||
vector = [float(v) for v in vector.split("\t")]
|
vector = [get_float(v) for v in vector.split("\t")]
|
||||||
ins_embd.append(vector)
|
ins_embd.append(vector)
|
||||||
if not ins_embd:
|
if not ins_embd:
|
||||||
return [], [], []
|
return [], [], []
|
||||||
|
@ -19,6 +19,7 @@ import re
|
|||||||
import tiktoken
|
import tiktoken
|
||||||
from api.utils.file_utils import get_project_base_directory
|
from api.utils.file_utils import get_project_base_directory
|
||||||
|
|
||||||
|
|
||||||
def singleton(cls, *args, **kw):
|
def singleton(cls, *args, **kw):
|
||||||
instances = {}
|
instances = {}
|
||||||
|
|
||||||
@ -89,3 +90,12 @@ def num_tokens_from_string(string: str) -> int:
|
|||||||
def truncate(string: str, max_len: int) -> str:
|
def truncate(string: str, max_len: int) -> str:
|
||||||
"""Returns truncated text if the length of text exceed max_len."""
|
"""Returns truncated text if the length of text exceed max_len."""
|
||||||
return encoder.decode(encoder.encode(string)[:max_len])
|
return encoder.decode(encoder.encode(string)[:max_len])
|
||||||
|
|
||||||
|
|
||||||
|
def get_float(v: str | None):
|
||||||
|
if v is None:
|
||||||
|
return float('-inf')
|
||||||
|
try:
|
||||||
|
return float(v)
|
||||||
|
except Exception:
|
||||||
|
return float('-inf')
|
@ -26,7 +26,7 @@ from elasticsearch_dsl import UpdateByQuery, Q, Search, Index
|
|||||||
from elastic_transport import ConnectionTimeout
|
from elastic_transport import ConnectionTimeout
|
||||||
from rag import settings
|
from rag import settings
|
||||||
from rag.settings import TAG_FLD, PAGERANK_FLD
|
from rag.settings import TAG_FLD, PAGERANK_FLD
|
||||||
from rag.utils import singleton
|
from rag.utils import singleton, get_float
|
||||||
from api.utils.file_utils import get_project_base_directory
|
from api.utils.file_utils import get_project_base_directory
|
||||||
from rag.utils.doc_store_conn import DocStoreConnection, MatchExpr, OrderByExpr, MatchTextExpr, MatchDenseExpr, \
|
from rag.utils.doc_store_conn import DocStoreConnection, MatchExpr, OrderByExpr, MatchTextExpr, MatchDenseExpr, \
|
||||||
FusionExpr
|
FusionExpr
|
||||||
@ -178,7 +178,7 @@ class ESConnection(DocStoreConnection):
|
|||||||
MatchDenseExpr) and isinstance(
|
MatchDenseExpr) and isinstance(
|
||||||
matchExprs[2], FusionExpr)
|
matchExprs[2], FusionExpr)
|
||||||
weights = m.fusion_params["weights"]
|
weights = m.fusion_params["weights"]
|
||||||
vector_similarity_weight = float(weights.split(",")[1])
|
vector_similarity_weight = get_float(weights.split(",")[1])
|
||||||
for m in matchExprs:
|
for m in matchExprs:
|
||||||
if isinstance(m, MatchTextExpr):
|
if isinstance(m, MatchTextExpr):
|
||||||
minimum_should_match = m.extra_options.get("minimum_should_match", 0.0)
|
minimum_should_match = m.extra_options.get("minimum_should_match", 0.0)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user