mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-08-05 15:00:43 +08:00
Fix: Add title_tks for Pictures (#7365)
### What problem does this PR solve? https://github.com/infiniflow/ragflow/issues/7362 append title_tks ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
parent
23dcbc94ef
commit
1a5608d0f8
@ -15,6 +15,7 @@
|
|||||||
#
|
#
|
||||||
|
|
||||||
import io
|
import io
|
||||||
|
import re
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
@ -24,6 +25,8 @@ from api.db.services.llm_service import LLMBundle
|
|||||||
from deepdoc.vision import OCR
|
from deepdoc.vision import OCR
|
||||||
from rag.nlp import tokenize
|
from rag.nlp import tokenize
|
||||||
from rag.utils import clean_markdown_block
|
from rag.utils import clean_markdown_block
|
||||||
|
from rag.nlp import rag_tokenizer
|
||||||
|
|
||||||
|
|
||||||
ocr = OCR()
|
ocr = OCR()
|
||||||
|
|
||||||
@ -32,6 +35,7 @@ def chunk(filename, binary, tenant_id, lang, callback=None, **kwargs):
|
|||||||
img = Image.open(io.BytesIO(binary)).convert('RGB')
|
img = Image.open(io.BytesIO(binary)).convert('RGB')
|
||||||
doc = {
|
doc = {
|
||||||
"docnm_kwd": filename,
|
"docnm_kwd": filename,
|
||||||
|
"title_tks": rag_tokenizer.tokenize(re.sub(r"\.[a-zA-Z]+$", "", filename)),
|
||||||
"image": img
|
"image": img
|
||||||
}
|
}
|
||||||
bxs = ocr(np.array(img))
|
bxs = ocr(np.array(img))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user