Fix: float transfer exception. (#6197)

### What problem does this PR solve? #6177 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
2025-08-11 22:28:57 +08:00 · 2025-03-18 11:13:44 +08:00 · 2025-03-18 11:13:44 +08:00 · 1333d3c02a
commit 1333d3c02a
parent 222a2c8fa5
5 changed files with 27 additions and 15 deletions
--- a/graphrag/search.py
+++ b/graphrag/search.py
@ -24,7 +24,7 @@ import trio
 from api.utils import get_uuid
 from graphrag.query_analyze_prompt import PROMPTS
 from graphrag.utils import get_entity_type2sampels, get_llm_cache, set_llm_cache, get_relation
-from rag.utils import num_tokens_from_string
+from rag.utils import num_tokens_from_string, get_float
 from rag.utils.doc_store_conn import OrderByExpr

 from rag.nlp.search import Dealer, index_name
@ -72,13 +72,13 @@ class KGSearch(Dealer):
            for f in flds:
                if f in ent and ent[f] is None:
                    del ent[f]
-            if float(ent.get("_score", 0)) < sim_thr:
+            if get_float(ent.get("_score", 0)) < sim_thr:
                continue
            if isinstance(ent["entity_kwd"], list):
                ent["entity_kwd"] = ent["entity_kwd"][0]
            res[ent["entity_kwd"]] = {
-                "sim": float(ent.get("_score", 0)),
-                "pagerank": float(ent.get("rank_flt", 0)),
+                "sim": get_float(ent.get("_score", 0)),
+                "pagerank": get_float(ent.get("rank_flt", 0)),
                "n_hop_ents": json.loads(ent.get("n_hop_with_weight", "[]")),
                "description": ent.get("content_with_weight", "{}")
            }
@ -89,7 +89,7 @@ class KGSearch(Dealer):
        es_res = self.dataStore.getFields(es_res, ["content_with_weight", "_score", "from_entity_kwd", "to_entity_kwd",
                                                   "weight_int"])
        for _, ent in es_res.items():
-            if float(ent["_score"]) < sim_thr:
+            if get_float(ent["_score"]) < sim_thr:
                continue
            f, t = sorted([ent["from_entity_kwd"], ent["to_entity_kwd"]])
            if isinstance(f, list):
@ -97,8 +97,8 @@ class KGSearch(Dealer):
            if isinstance(t, list):
                t = t[0]
            res[(f, t)] = {
-                "sim": float(ent["_score"]),
-                "pagerank": float(ent.get("weight_int", 0)),
+                "sim": get_float(ent["_score"]),
+                "pagerank": get_float(ent.get("weight_int", 0)),
                "description": ent["content_with_weight"]
            }
        return res
--- a/rag/app/qa.py
+++ b/rag/app/qa.py
@ -30,6 +30,8 @@ from docx import Document
 from PIL import Image
 from markdown import markdown

+from rag.utils import get_float
+

 class Excel(ExcelParser):
    def __call__(self, fnm, binary=None, callback=None):
@ -126,8 +128,8 @@ class Pdf(PdfParser):
            section, line_tag = box['text'], self._line_tag(box, zoomin)
            has_bull, index = has_qbullet(reg, box, last_box, last_index, last_bull, bull_x0_list)
            last_box, last_index, last_bull = box, index, has_bull
-            line_pn = float(line_tag.lstrip('@@').split('\t')[0])
-            line_top = float(line_tag.rstrip('##').split('\t')[3])
+            line_pn = get_float(line_tag.lstrip('@@').split('\t')[0])
+            line_top = get_float(line_tag.rstrip('##').split('\t')[3])
            tbl_pn, tbl_left, tbl_right, tbl_top, tbl_bottom, tbl_tag, tbl_text = self.get_tbls_info(tbls, tbl_index)
            if not has_bull:  # No question bullet
                if not last_q:
--- a/rag/nlp/search.py
+++ b/rag/nlp/search.py
@ -18,7 +18,7 @@ import re
 from dataclasses import dataclass

 from rag.settings import TAG_FLD, PAGERANK_FLD
-from rag.utils import rmSpace
+from rag.utils import rmSpace, get_float
 from rag.nlp import rag_tokenizer, query
 import numpy as np
 from rag.utils.doc_store_conn import DocStoreConnection, MatchDenseExpr, FusionExpr, OrderByExpr
@ -49,7 +49,7 @@ class Dealer:
        if len(shape) > 1:
            raise Exception(
                f"Dealer.get_vector returned array's shape {shape} doesn't match expectation(exact one dimension).")
-        embedding_data = [float(v) for v in qv]
+        embedding_data = [get_float(v) for v in qv]
        vector_column_name = f"q_{len(embedding_data)}_vec"
        return MatchDenseExpr(vector_column_name, embedding_data, 'float', 'cosine', topk, {"similarity": similarity})

@ -153,7 +153,7 @@ class Dealer:

    @staticmethod
    def trans2floats(txt):
-        return [float(t) for t in txt.split("\t")]
+        return [get_float(t) for t in txt.split("\t")]

    def insert_citations(self, answer, chunks, chunk_v,
                         embd_mdl, tkweight=0.1, vtweight=0.9):
@ -282,7 +282,7 @@ class Dealer:
        for chunk_id in sres.ids:
            vector = sres.field[chunk_id].get(vector_column, zero_vector)
            if isinstance(vector, str):
-                vector = [float(v) for v in vector.split("\t")]
+                vector = [get_float(v) for v in vector.split("\t")]
            ins_embd.append(vector)
        if not ins_embd:
            return [], [], []
--- a/rag/utils/init.py
+++ b/rag/utils/init.py
@ -19,6 +19,7 @@ import re
 import tiktoken
 from api.utils.file_utils import get_project_base_directory

+
 def singleton(cls, *args, **kw):
    instances = {}

@ -89,3 +90,12 @@ def num_tokens_from_string(string: str) -> int:
 def truncate(string: str, max_len: int) -> str:
    """Returns truncated text if the length of text exceed max_len."""
    return encoder.decode(encoder.encode(string)[:max_len])
+
+
+def get_float(v: str | None):
+    if v is None:
+        return float('-inf')
+    try:
+        return float(v)
+    except Exception:
+        return float('-inf')
--- a/rag/utils/es_conn.py
+++ b/rag/utils/es_conn.py
@ -26,7 +26,7 @@ from elasticsearch_dsl import UpdateByQuery, Q, Search, Index
 from elastic_transport import ConnectionTimeout
 from rag import settings
 from rag.settings import TAG_FLD, PAGERANK_FLD
-from rag.utils import singleton
+from rag.utils import singleton, get_float
 from api.utils.file_utils import get_project_base_directory
 from rag.utils.doc_store_conn import DocStoreConnection, MatchExpr, OrderByExpr, MatchTextExpr, MatchDenseExpr, \
    FusionExpr
@ -178,7 +178,7 @@ class ESConnection(DocStoreConnection):
                                                                                                        MatchDenseExpr) and isinstance(
                    matchExprs[2], FusionExpr)
                weights = m.fusion_params["weights"]
-                vector_similarity_weight = float(weights.split(",")[1])
+                vector_similarity_weight = get_float(weights.split(",")[1])
        for m in matchExprs:
            if isinstance(m, MatchTextExpr):
                minimum_should_match = m.extra_options.get("minimum_should_match", 0.0)