mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-08-05 11:20:39 +08:00
Fix: Add title_tks for Pictures (#7365)
### What problem does this PR solve? https://github.com/infiniflow/ragflow/issues/7362 append title_tks ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
parent
23dcbc94ef
commit
1a5608d0f8
@ -15,6 +15,7 @@
|
||||
#
|
||||
|
||||
import io
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
@ -24,6 +25,8 @@ from api.db.services.llm_service import LLMBundle
|
||||
from deepdoc.vision import OCR
|
||||
from rag.nlp import tokenize
|
||||
from rag.utils import clean_markdown_block
|
||||
from rag.nlp import rag_tokenizer
|
||||
|
||||
|
||||
ocr = OCR()
|
||||
|
||||
@ -32,6 +35,7 @@ def chunk(filename, binary, tenant_id, lang, callback=None, **kwargs):
|
||||
img = Image.open(io.BytesIO(binary)).convert('RGB')
|
||||
doc = {
|
||||
"docnm_kwd": filename,
|
||||
"title_tks": rag_tokenizer.tokenize(re.sub(r"\.[a-zA-Z]+$", "", filename)),
|
||||
"image": img
|
||||
}
|
||||
bxs = ocr(np.array(img))
|
||||
|
Loading…
x
Reference in New Issue
Block a user