mirror of
https://git-proxy.hk.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-04-10 15:50:31 +08:00
Add test for document (#3548)
### What problem does this PR solve? Add test for document ### Type of change - [x] New Feature (non-breaking change which adds functionality) Co-authored-by: liuhua <10215101452@stu.ecun.edu.cn>
This commit is contained in:
parent
0ac6dc8f8c
commit
c7c8b3812f
@ -115,6 +115,7 @@ def upload(dataset_id, tenant_id):
|
||||
return get_result(
|
||||
message="No file selected!", code=settings.RetCode.ARGUMENT_ERROR
|
||||
)
|
||||
'''
|
||||
# total size
|
||||
total_size = 0
|
||||
for file_obj in file_objs:
|
||||
@ -127,6 +128,7 @@ def upload(dataset_id, tenant_id):
|
||||
message=f"Total file size exceeds 10MB limit! ({total_size / (1024 * 1024):.2f} MB)",
|
||||
code=settings.RetCode.ARGUMENT_ERROR,
|
||||
)
|
||||
'''
|
||||
e, kb = KnowledgebaseService.get_by_id(dataset_id)
|
||||
if not e:
|
||||
raise LookupError(f"Can't find the dataset with ID {dataset_id}!")
|
||||
|
@ -1,6 +1,6 @@
|
||||
from ragflow_sdk import RAGFlow, DataSet, Document, Chunk
|
||||
from ragflow_sdk import RAGFlow
|
||||
from common import HOST_ADDRESS
|
||||
|
||||
import pytest
|
||||
|
||||
def test_upload_document_with_success(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
@ -48,7 +48,6 @@ def test_list_documents_in_dataset_with_success(get_api_key_fixture):
|
||||
ds.list_documents(keywords="test", page=1, page_size=12)
|
||||
|
||||
|
||||
|
||||
def test_delete_documents_in_dataset_with_success(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
@ -59,4 +58,109 @@ def test_delete_documents_in_dataset_with_success(get_api_key_fixture):
|
||||
docs = ds.upload_documents(document_infos)
|
||||
ds.delete_documents([docs[0].id])
|
||||
|
||||
# upload and parse the document with different in different parse method.
|
||||
def test_upload_and_parse_pdf_documents_with_general_parse_method(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_pdf_document")
|
||||
with open("test_data/test.pdf","rb") as file:
|
||||
blob=file.read()
|
||||
document_infos = [{"displayed_name": "test.pdf","blob": blob}]
|
||||
docs=ds.upload_documents(document_infos)
|
||||
doc = docs[0]
|
||||
ds.async_parse_documents([doc.id])
|
||||
|
||||
def test_upload_and_parse_docx_documents_with_general_parse_method(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_docx_document")
|
||||
with open("test_data/test.docx","rb") as file:
|
||||
blob=file.read()
|
||||
document_infos = [{"displayed_name": "test.docx","blob": blob}]
|
||||
docs=ds.upload_documents(document_infos)
|
||||
doc = docs[0]
|
||||
ds.async_parse_documents([doc.id])
|
||||
def test_upload_and_parse_excel_documents_with_general_parse_method(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_excel_document")
|
||||
with open("test_data/test.xlsx","rb") as file:
|
||||
blob=file.read()
|
||||
document_infos = [{"displayed_name": "test.xlsx","blob": blob}]
|
||||
docs=ds.upload_documents(document_infos)
|
||||
doc = docs[0]
|
||||
ds.async_parse_documents([doc.id])
|
||||
def test_upload_and_parse_ppt_documents_with_general_parse_method(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_ppt_document")
|
||||
with open("test_data/test.ppt","rb") as file:
|
||||
blob=file.read()
|
||||
document_infos = [{"displayed_name": "test.ppt","blob": blob}]
|
||||
docs=ds.upload_documents(document_infos)
|
||||
doc = docs[0]
|
||||
ds.async_parse_documents([doc.id])
|
||||
def test_upload_and_parse_image_documents_with_general_parse_method(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_image_document")
|
||||
with open("test_data/test.jpg","rb") as file:
|
||||
blob=file.read()
|
||||
document_infos = [{"displayed_name": "test.jpg","blob": blob}]
|
||||
docs=ds.upload_documents(document_infos)
|
||||
doc = docs[0]
|
||||
ds.async_parse_documents([doc.id])
|
||||
def test_upload_and_parse_txt_documents_with_general_parse_method(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_txt_document")
|
||||
with open("test_data/test.txt","rb") as file:
|
||||
blob=file.read()
|
||||
document_infos = [{"displayed_name": "test.txt","blob": blob}]
|
||||
docs=ds.upload_documents(document_infos)
|
||||
doc = docs[0]
|
||||
ds.async_parse_documents([doc.id])
|
||||
def test_upload_and_parse_md_documents_with_general_parse_method(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_md_document")
|
||||
with open("test_data/test.md","rb") as file:
|
||||
blob=file.read()
|
||||
document_infos = [{"displayed_name": "test.md","blob": blob}]
|
||||
docs=ds.upload_documents(document_infos)
|
||||
doc = docs[0]
|
||||
ds.async_parse_documents([doc.id])
|
||||
|
||||
def test_upload_and_parse_json_documents_with_general_parse_method(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_json_document")
|
||||
with open("test_data/test.json","rb") as file:
|
||||
blob=file.read()
|
||||
document_infos = [{"displayed_name": "test.json","blob": blob}]
|
||||
docs=ds.upload_documents(document_infos)
|
||||
doc = docs[0]
|
||||
ds.async_parse_documents([doc.id])
|
||||
|
||||
@pytest.mark.skip(reason="")
|
||||
def test_upload_and_parse_eml_documents_with_general_parse_method(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_eml_document")
|
||||
with open("test_data/test.eml","rb") as file:
|
||||
blob=file.read()
|
||||
document_infos = [{"displayed_name": "test.eml","blob": blob}]
|
||||
docs=ds.upload_documents(document_infos)
|
||||
doc = docs[0]
|
||||
ds.async_parse_documents([doc.id])
|
||||
|
||||
def test_upload_and_parse_html_documents_with_general_parse_method(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.create_dataset(name="test_html_document")
|
||||
with open("test_data/test.html","rb") as file:
|
||||
blob=file.read()
|
||||
document_infos = [{"displayed_name": "test.html","blob": blob}]
|
||||
docs=ds.upload_documents(document_infos)
|
||||
doc = docs[0]
|
||||
ds.async_parse_documents([doc.id])
|
@ -1,2 +0,0 @@
|
||||
hhh
|
||||
hhh
|
@ -1,3 +0,0 @@
|
||||
llll
|
||||
ooooo
|
||||
llll
|
@ -1,8 +0,0 @@
|
||||
Once upon a time, in a small village nestled at the foot of a towering mountain, lived a young girl named Lily. Lily had a heart as pure as the mountain's snowcaps and a spirit as adventurous as the winding trails that led to its peak.
|
||||
One day, as Lily was gathering berries in the forest's edge, she stumbled upon an old, weathered map hidden beneath a fallen tree. The map was covered in strange symbols and a single, glowing word: "Treasure." Curiousity piqued, Lily decided to embark on a quest to uncover the mystery of the treasure.
|
||||
With nothing more than her trusty basket of berries, a few pieces of bread, and the map, Lily set off into the unknown. As she climbed higher and higher into the mountains, the air grew crisp, and the scenery transformed into a breathtaking tapestry of lush greenery and sparkling streams.
|
||||
Along the way, Lily encountered all sorts of challenges. She had to navigate treacherous rivers using fallen logs as bridges, climb steep cliffs with nothing but her agility and determination, and even outsmart a mischievous pack of foxes that tried to lead her astray. But through it all, Lily remained steadfast, her heart filled with hope and a sense of purpose.
|
||||
Finally, after what seemed like an eternity of trekking, Lily arrived at a hidden valley. At its center stood an ancient tree, its roots entwined with glittering jewels and a chest made of pure gold. This, the map had revealed, was the source of the treasure.
|
||||
But as Lily approached the chest, she realized that the true treasure was not the riches before her. It was the journey itself—the friendships she had forged with the animals she encountered, the strength she had gained from overcoming obstacles, and the sense of wonder and discovery that filled her heart.
|
||||
With a smile on her face, Lily gently closed the chest and left it where it was, content in the knowledge that the greatest treasures in life are not always found in gold or jewels. She turned back towards home, her heart full of stories to share and a spirit that had been forever changed by her adventure.
|
||||
And so, Lily returned to her village, a hero in her own right, with a tale that would be whispered around firesides for generations to come.
|
BIN
sdk/python/test/test_data/test.docx
Normal file
BIN
sdk/python/test/test_data/test.docx
Normal file
Binary file not shown.
148
sdk/python/test/test_data/test.html
Normal file
148
sdk/python/test/test_data/test.html
Normal file
File diff suppressed because one or more lines are too long
BIN
sdk/python/test/test_data/test.jpg
Normal file
BIN
sdk/python/test/test_data/test.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 87 KiB |
107
sdk/python/test/test_data/test.json
Normal file
107
sdk/python/test/test_data/test.json
Normal file
@ -0,0 +1,107 @@
|
||||
{
|
||||
"单车": [
|
||||
"自行车"
|
||||
],
|
||||
"青禾服装": [
|
||||
"青禾服饰"
|
||||
],
|
||||
"救济灾民": [
|
||||
"救助",
|
||||
"灾民救济",
|
||||
"赈济"
|
||||
],
|
||||
"左移": [],
|
||||
"低速": [],
|
||||
"雨果网": [],
|
||||
"钢小二": [
|
||||
"成立于2013年,位于江苏省无锡市,是一家以从事研究和试验发展为主的企业"
|
||||
],
|
||||
"第五项": [
|
||||
"5项"
|
||||
],
|
||||
"铸排机": [
|
||||
"机排",
|
||||
"排铸机",
|
||||
"排铸"
|
||||
],
|
||||
"金淳高分子": [],
|
||||
"麦门冬汤": [],
|
||||
"错位": [],
|
||||
"佰特吉姆": [],
|
||||
"楼体": [],
|
||||
"展美科技": [
|
||||
"美展"
|
||||
],
|
||||
"中寮": [],
|
||||
"贪官汙吏": [
|
||||
"...",
|
||||
"贪吏",
|
||||
"贪官污吏"
|
||||
],
|
||||
"掩蔽部": [
|
||||
"掩 蔽 部"
|
||||
],
|
||||
"海宏智能": [],
|
||||
"中寰": [],
|
||||
"万次": [],
|
||||
"领星资本": [
|
||||
"星领"
|
||||
],
|
||||
"肯讯": [],
|
||||
"坎肩": [],
|
||||
"爱农人": [],
|
||||
"易美餐": [],
|
||||
"寸丝半粟": [],
|
||||
"罗丹萍": [],
|
||||
"转导物": [],
|
||||
"泊寓": [],
|
||||
"万欧": [
|
||||
"欧万"
|
||||
],
|
||||
"友聚惠": [
|
||||
"友惠",
|
||||
"惠友"
|
||||
],
|
||||
"舞牙弄爪": [
|
||||
":形容凶猛的样子,比喻威胁、恐吓",
|
||||
"原形容猛兽的凶相,后常用来比喻猖狂凶恶的样子",
|
||||
"成语解释:原形容猛兽的凶相,后常用来比喻猖狂凶恶的样子",
|
||||
"原形容猛兽的凶相,后常用来比喻猖狂(好工具hao86.com",
|
||||
"牙舞爪",
|
||||
"形容猛兽凶恶可怕。也比喻猖狂凶恶",
|
||||
"舞爪"
|
||||
],
|
||||
"上海致上": [
|
||||
"上海上",
|
||||
"上海市"
|
||||
],
|
||||
"迪因加": [],
|
||||
"李正茂": [],
|
||||
"君来投": [],
|
||||
"双掌空": [
|
||||
"双掌 空",
|
||||
"空掌",
|
||||
"两手空空"
|
||||
],
|
||||
"浩石": [
|
||||
"石浩",
|
||||
"皓石"
|
||||
],
|
||||
"云阅文学": [],
|
||||
"阿斯帕": [],
|
||||
"中导": [],
|
||||
"以诚相待": [],
|
||||
"中融金服": [],
|
||||
"尚股网": [],
|
||||
"叶立钦": [
|
||||
"叶利钦"
|
||||
],
|
||||
"新信钱包": [
|
||||
"信信"
|
||||
],
|
||||
"赛苏投资": [
|
||||
"投资者"
|
||||
],
|
||||
"售价": [],
|
||||
"帮医网": []
|
||||
}
|
21
sdk/python/test/test_data/test.md
Normal file
21
sdk/python/test/test_data/test.md
Normal file
@ -0,0 +1,21 @@
|
||||
Quod equidem non reprehendo;
|
||||
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Quibus natura iure responderit non esse verum aliunde finem beate vivendi, a se principia rei gerendae peti; Quae enim adhuc protulisti, popularia sunt, ego autem a te elegantiora desidero. Duo Reges: constructio interrete. Tum Lucius: Mihi vero ista valde probata sunt, quod item fratri puto. Bestiarum vero nullum iudicium puto. Nihil enim iam habes, quod ad corpus referas; Deinde prima illa, quae in congressu solemus: Quid tu, inquit, huc? Et homini, qui ceteris animantibus plurimum praestat, praecipue a natura nihil datum esse dicemus?
|
||||
|
||||
Iam id ipsum absurdum, maximum malum neglegi. Quod ea non occurrentia fingunt, vincunt Aristonem; Atqui perspicuum est hominem e corpore animoque constare, cum primae sint animi partes, secundae corporis. Fieri, inquam, Triari, nullo pacto potest, ut non dicas, quid non probes eius, a quo dissentias. Equidem e Cn. An dubium est, quin virtus ita maximam partem optineat in rebus humanis, ut reliquas obruat?
|
||||
|
||||
Quis istum dolorem timet?
|
||||
Summus dolor plures dies manere non potest? Dicet pro me ipsa virtus nec dubitabit isti vestro beato M. Tubulum fuisse, qua illum, cuius is condemnatus est rogatione, P. Quod si ita sit, cur opera philosophiae sit danda nescio.
|
||||
|
||||
Ex eorum enim scriptis et institutis cum omnis doctrina liberalis, omnis historia.
|
||||
Quod si ita est, sequitur id ipsum, quod te velle video, omnes semper beatos esse sapientes. Cum enim fertur quasi torrens oratio, quamvis multa cuiusque modi rapiat, nihil tamen teneas, nihil apprehendas, nusquam orationem rapidam coerceas. Ita redarguitur ipse a sese, convincunturque scripta eius probitate ipsius ac moribus. At quanta conantur! Mundum hunc omnem oppidum esse nostrum! Incendi igitur eos, qui audiunt, vides. Vide, ne magis, inquam, tuum fuerit, cum re idem tibi, quod mihi, videretur, non nova te rebus nomina inponere. Qui-vere falsone, quaerere mittimus-dicitur oculis se privasse; Si ista mala sunt, in quae potest incidere sapiens, sapientem esse non esse ad beate vivendum satis. At vero si ad vitem sensus accesserit, ut appetitum quendam habeat et per se ipsa moveatur, quid facturam putas?
|
||||
|
||||
Quem si tenueris, non modo meum Ciceronem, sed etiam me ipsum abducas licebit.
|
||||
Stulti autem malorum memoria torquentur, sapientes bona praeterita grata recordatione renovata delectant.
|
||||
Esse enim quam vellet iniquus iustus poterat inpune.
|
||||
Quae autem natura suae primae institutionis oblita est?
|
||||
Verum tamen cum de rebus grandioribus dicas, ipsae res verba rapiunt;
|
||||
Hoc est non modo cor non habere, sed ne palatum quidem.
|
||||
Voluptatem cum summum bonum diceret, primum in eo ipso parum vidit, deinde hoc quoque alienum; Sed tu istuc dixti bene Latine, parum plane. Nam haec ipsa mihi erunt in promptu, quae modo audivi, nec ante aggrediar, quam te ab istis, quos dicis, instructum videro. Fatebuntur Stoici haec omnia dicta esse praeclare, neque eam causam Zenoni desciscendi fuisse. Non autem hoc: igitur ne illud quidem. Ratio quidem vestra sic cogit. Cum audissem Antiochum, Brute, ut solebam, cum M. An quod ita callida est, ut optime possit architectari voluptates?
|
||||
|
||||
Idemne, quod iucunde?
|
||||
Haec mihi videtur delicatior, ut ita dicam, molliorque ratio, quam virtutis vis gravitasque postulat. Sed quoniam et advesperascit et mihi ad villam revertendum est, nunc quidem hactenus; Cuius ad naturam apta ratio vera illa et summa lex a philosophis dicitur. Neque solum ea communia, verum etiam paria esse dixerunt. Sed nunc, quod agimus; A mene tu?
|
BIN
sdk/python/test/test_data/test.pdf
Normal file
BIN
sdk/python/test/test_data/test.pdf
Normal file
Binary file not shown.
BIN
sdk/python/test/test_data/test.ppt
Normal file
BIN
sdk/python/test/test_data/test.ppt
Normal file
Binary file not shown.
@ -1,3 +1,21 @@
|
||||
test
|
||||
test
|
||||
test
|
||||
Quod equidem non reprehendo;
|
||||
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Quibus natura iure responderit non esse verum aliunde finem beate vivendi, a se principia rei gerendae peti; Quae enim adhuc protulisti, popularia sunt, ego autem a te elegantiora desidero. Duo Reges: constructio interrete. Tum Lucius: Mihi vero ista valde probata sunt, quod item fratri puto. Bestiarum vero nullum iudicium puto. Nihil enim iam habes, quod ad corpus referas; Deinde prima illa, quae in congressu solemus: Quid tu, inquit, huc? Et homini, qui ceteris animantibus plurimum praestat, praecipue a natura nihil datum esse dicemus?
|
||||
|
||||
Iam id ipsum absurdum, maximum malum neglegi. Quod ea non occurrentia fingunt, vincunt Aristonem; Atqui perspicuum est hominem e corpore animoque constare, cum primae sint animi partes, secundae corporis. Fieri, inquam, Triari, nullo pacto potest, ut non dicas, quid non probes eius, a quo dissentias. Equidem e Cn. An dubium est, quin virtus ita maximam partem optineat in rebus humanis, ut reliquas obruat?
|
||||
|
||||
Quis istum dolorem timet?
|
||||
Summus dolor plures dies manere non potest? Dicet pro me ipsa virtus nec dubitabit isti vestro beato M. Tubulum fuisse, qua illum, cuius is condemnatus est rogatione, P. Quod si ita sit, cur opera philosophiae sit danda nescio.
|
||||
|
||||
Ex eorum enim scriptis et institutis cum omnis doctrina liberalis, omnis historia.
|
||||
Quod si ita est, sequitur id ipsum, quod te velle video, omnes semper beatos esse sapientes. Cum enim fertur quasi torrens oratio, quamvis multa cuiusque modi rapiat, nihil tamen teneas, nihil apprehendas, nusquam orationem rapidam coerceas. Ita redarguitur ipse a sese, convincunturque scripta eius probitate ipsius ac moribus. At quanta conantur! Mundum hunc omnem oppidum esse nostrum! Incendi igitur eos, qui audiunt, vides. Vide, ne magis, inquam, tuum fuerit, cum re idem tibi, quod mihi, videretur, non nova te rebus nomina inponere. Qui-vere falsone, quaerere mittimus-dicitur oculis se privasse; Si ista mala sunt, in quae potest incidere sapiens, sapientem esse non esse ad beate vivendum satis. At vero si ad vitem sensus accesserit, ut appetitum quendam habeat et per se ipsa moveatur, quid facturam putas?
|
||||
|
||||
Quem si tenueris, non modo meum Ciceronem, sed etiam me ipsum abducas licebit.
|
||||
Stulti autem malorum memoria torquentur, sapientes bona praeterita grata recordatione renovata delectant.
|
||||
Esse enim quam vellet iniquus iustus poterat inpune.
|
||||
Quae autem natura suae primae institutionis oblita est?
|
||||
Verum tamen cum de rebus grandioribus dicas, ipsae res verba rapiunt;
|
||||
Hoc est non modo cor non habere, sed ne palatum quidem.
|
||||
Voluptatem cum summum bonum diceret, primum in eo ipso parum vidit, deinde hoc quoque alienum; Sed tu istuc dixti bene Latine, parum plane. Nam haec ipsa mihi erunt in promptu, quae modo audivi, nec ante aggrediar, quam te ab istis, quos dicis, instructum videro. Fatebuntur Stoici haec omnia dicta esse praeclare, neque eam causam Zenoni desciscendi fuisse. Non autem hoc: igitur ne illud quidem. Ratio quidem vestra sic cogit. Cum audissem Antiochum, Brute, ut solebam, cum M. An quod ita callida est, ut optime possit architectari voluptates?
|
||||
|
||||
Idemne, quod iucunde?
|
||||
Haec mihi videtur delicatior, ut ita dicam, molliorque ratio, quam virtutis vis gravitasque postulat. Sed quoniam et advesperascit et mihi ad villam revertendum est, nunc quidem hactenus; Cuius ad naturam apta ratio vera illa et summa lex a philosophis dicitur. Neque solum ea communia, verum etiam paria esse dixerunt. Sed nunc, quod agimus; A mene tu?
|
BIN
sdk/python/test/test_data/test.xlsx
Normal file
BIN
sdk/python/test/test_data/test.xlsx
Normal file
Binary file not shown.
@ -1,4 +0,0 @@
|
||||
test1
|
||||
test1
|
||||
aaaa document args arg
|
||||
rag document
|
@ -1,4 +0,0 @@
|
||||
test22
|
||||
test22
|
||||
aaaa document args arg
|
||||
rag document
|
@ -1,4 +0,0 @@
|
||||
test3
|
||||
test333
|
||||
aaaa document args arg
|
||||
rag document
|
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user