Fix: Add title_tks for Pictures (#7365)

### What problem does this PR solve?
https://github.com/infiniflow/ragflow/issues/7362

append title_tks
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
Stephen Hu 2025-04-28 13:35:34 +08:00 committed by GitHub
parent 23dcbc94ef
commit 1a5608d0f8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -15,6 +15,7 @@
#
import io
import re
import numpy as np
from PIL import Image
@ -24,6 +25,8 @@ from api.db.services.llm_service import LLMBundle
from deepdoc.vision import OCR
from rag.nlp import tokenize
from rag.utils import clean_markdown_block
from rag.nlp import rag_tokenizer
ocr = OCR()
@ -32,6 +35,7 @@ def chunk(filename, binary, tenant_id, lang, callback=None, **kwargs):
img = Image.open(io.BytesIO(binary)).convert('RGB')
doc = {
"docnm_kwd": filename,
"title_tks": rag_tokenizer.tokenize(re.sub(r"\.[a-zA-Z]+$", "", filename)),
"image": img
}
bxs = ocr(np.array(img))