mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-08-12 01:29:01 +08:00
dep: bump pandas from 1.x to 2.x (#4820)
This commit is contained in:
parent
9abeb99b32
commit
58db719a2c
@ -57,7 +57,7 @@ class CSVExtractor(BaseExtractor):
|
|||||||
docs = []
|
docs = []
|
||||||
try:
|
try:
|
||||||
# load csv file into pandas dataframe
|
# load csv file into pandas dataframe
|
||||||
df = pd.read_csv(csvfile, error_bad_lines=False, **self.csv_args)
|
df = pd.read_csv(csvfile, on_bad_lines='skip', **self.csv_args)
|
||||||
|
|
||||||
# check source column exists
|
# check source column exists
|
||||||
if self.source_column and self.source_column not in df.columns:
|
if self.source_column and self.source_column not in df.columns:
|
||||||
|
@ -26,7 +26,6 @@ sympy==1.12
|
|||||||
jieba==0.42.1
|
jieba==0.42.1
|
||||||
celery~=5.3.6
|
celery~=5.3.6
|
||||||
redis[hiredis]~=5.0.3
|
redis[hiredis]~=5.0.3
|
||||||
openpyxl==3.1.2
|
|
||||||
chardet~=5.1.0
|
chardet~=5.1.0
|
||||||
python-docx~=1.1.0
|
python-docx~=1.1.0
|
||||||
pypdfium2~=4.17.0
|
pypdfium2~=4.17.0
|
||||||
@ -51,7 +50,7 @@ dashscope[tokenizer]~=1.17.0
|
|||||||
huggingface_hub~=0.16.4
|
huggingface_hub~=0.16.4
|
||||||
transformers~=4.35.0
|
transformers~=4.35.0
|
||||||
tokenizers~=0.15.0
|
tokenizers~=0.15.0
|
||||||
pandas==1.5.3
|
pandas[performance,excel]~=2.2.2
|
||||||
xinference-client==0.9.4
|
xinference-client==0.9.4
|
||||||
safetensors~=0.4.3
|
safetensors~=0.4.3
|
||||||
zhipuai==1.0.7
|
zhipuai==1.0.7
|
||||||
@ -78,7 +77,6 @@ qrcode~=7.4.2
|
|||||||
azure-storage-blob==12.13.0
|
azure-storage-blob==12.13.0
|
||||||
azure-identity==1.15.0
|
azure-identity==1.15.0
|
||||||
lxml==5.1.0
|
lxml==5.1.0
|
||||||
xlrd~=2.0.1
|
|
||||||
pydantic~=1.10.0
|
pydantic~=1.10.0
|
||||||
pgvecto-rs==0.1.4
|
pgvecto-rs==0.1.4
|
||||||
firecrawl-py==0.0.5
|
firecrawl-py==0.0.5
|
||||||
|
62
api/tests/unit_tests/libs/test_pandas.py
Normal file
62
api/tests/unit_tests/libs/test_pandas.py
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
def test_pandas_csv(tmp_path, monkeypatch):
|
||||||
|
monkeypatch.chdir(tmp_path)
|
||||||
|
data = {'col1': [1, 2.2, -3.3, 4.0, 5],
|
||||||
|
'col2': ['A', 'B', 'C', 'D', 'E']}
|
||||||
|
df1 = pd.DataFrame(data)
|
||||||
|
|
||||||
|
# write to csv file
|
||||||
|
csv_file_path = tmp_path.joinpath('example.csv')
|
||||||
|
df1.to_csv(csv_file_path, index=False)
|
||||||
|
|
||||||
|
# read from csv file
|
||||||
|
df2 = pd.read_csv(csv_file_path, on_bad_lines='skip')
|
||||||
|
assert df2[df2.columns[0]].to_list() == data['col1']
|
||||||
|
assert df2[df2.columns[1]].to_list() == data['col2']
|
||||||
|
|
||||||
|
|
||||||
|
def test_pandas_xlsx(tmp_path, monkeypatch):
|
||||||
|
monkeypatch.chdir(tmp_path)
|
||||||
|
data = {'col1': [1, 2.2, -3.3, 4.0, 5],
|
||||||
|
'col2': ['A', 'B', 'C', 'D', 'E']}
|
||||||
|
df1 = pd.DataFrame(data)
|
||||||
|
|
||||||
|
# write to xlsx file
|
||||||
|
xlsx_file_path = tmp_path.joinpath('example.xlsx')
|
||||||
|
df1.to_excel(xlsx_file_path, index=False)
|
||||||
|
|
||||||
|
# read from xlsx file
|
||||||
|
df2 = pd.read_excel(xlsx_file_path)
|
||||||
|
assert df2[df2.columns[0]].to_list() == data['col1']
|
||||||
|
assert df2[df2.columns[1]].to_list() == data['col2']
|
||||||
|
|
||||||
|
|
||||||
|
def test_pandas_xlsx_with_sheets(tmp_path, monkeypatch):
|
||||||
|
monkeypatch.chdir(tmp_path)
|
||||||
|
data1 = {'col1': [1, 2, 3, 4, 5],
|
||||||
|
'col2': ['A', 'B', 'C', 'D', 'E']}
|
||||||
|
df1 = pd.DataFrame(data1)
|
||||||
|
|
||||||
|
data2 = {'col1': [6, 7, 8, 9, 10],
|
||||||
|
'col2': ['F', 'G', 'H', 'I', 'J']}
|
||||||
|
df2 = pd.DataFrame(data2)
|
||||||
|
|
||||||
|
# write to xlsx file with sheets
|
||||||
|
xlsx_file_path = tmp_path.joinpath('example_with_sheets.xlsx')
|
||||||
|
sheet1 = 'Sheet1'
|
||||||
|
sheet2 = 'Sheet2'
|
||||||
|
with pd.ExcelWriter(xlsx_file_path) as excel_writer:
|
||||||
|
df1.to_excel(excel_writer, sheet_name=sheet1, index=False)
|
||||||
|
df2.to_excel(excel_writer, sheet_name=sheet2, index=False)
|
||||||
|
|
||||||
|
# read from xlsx file with sheets
|
||||||
|
with pd.ExcelFile(xlsx_file_path) as excel_file:
|
||||||
|
df1 = pd.read_excel(excel_file, sheet_name=sheet1)
|
||||||
|
assert df1[df1.columns[0]].to_list() == data1['col1']
|
||||||
|
assert df1[df1.columns[1]].to_list() == data1['col2']
|
||||||
|
|
||||||
|
df2 = pd.read_excel(excel_file, sheet_name=sheet2)
|
||||||
|
assert df2[df2.columns[0]].to_list() == data2['col1']
|
||||||
|
assert df2[df2.columns[1]].to_list() == data2['col2']
|
Loading…
x
Reference in New Issue
Block a user