mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-08-15 16:05:53 +08:00
Supports obtaining PDF documents from web pages (#1107)
### What problem does this PR solve? Knowledge base management supports crawling information from web pages and generating PDF documents ### Type of change - [x] New Feature (Support document from web pages)
This commit is contained in:
parent
68a698655a
commit
7eb69fe6d9
@ -39,6 +39,7 @@ from api.settings import RetCode
|
|||||||
from api.utils.api_utils import get_json_result
|
from api.utils.api_utils import get_json_result
|
||||||
from rag.utils.minio_conn import MINIO
|
from rag.utils.minio_conn import MINIO
|
||||||
from api.utils.file_utils import filename_type, thumbnail
|
from api.utils.file_utils import filename_type, thumbnail
|
||||||
|
from api.utils.web_utils import html2pdf, is_valid_url
|
||||||
|
|
||||||
|
|
||||||
@manager.route('/upload', methods=['POST'])
|
@manager.route('/upload', methods=['POST'])
|
||||||
@ -289,7 +290,7 @@ def run():
|
|||||||
return get_data_error_result(retmsg="Tenant not found!")
|
return get_data_error_result(retmsg="Tenant not found!")
|
||||||
ELASTICSEARCH.deleteByQuery(
|
ELASTICSEARCH.deleteByQuery(
|
||||||
Q("match", doc_id=id), idxnm=search.index_name(tenant_id))
|
Q("match", doc_id=id), idxnm=search.index_name(tenant_id))
|
||||||
|
|
||||||
if str(req["run"]) == TaskStatus.RUNNING.value:
|
if str(req["run"]) == TaskStatus.RUNNING.value:
|
||||||
TaskService.filter_delete([Task.doc_id == id])
|
TaskService.filter_delete([Task.doc_id == id])
|
||||||
e, doc = DocumentService.get_by_id(id)
|
e, doc = DocumentService.get_by_id(id)
|
||||||
@ -416,3 +417,69 @@ def get_image(image_id):
|
|||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return server_error_response(e)
|
return server_error_response(e)
|
||||||
|
|
||||||
|
|
||||||
|
@manager.route('/web_crawl', methods=['POST'])
|
||||||
|
@login_required
|
||||||
|
def web_crawl():
|
||||||
|
kb_id = request.form.get("kb_id")
|
||||||
|
if not kb_id:
|
||||||
|
return get_json_result(
|
||||||
|
data=False, retmsg='Lack of "KB ID"', retcode=RetCode.ARGUMENT_ERROR)
|
||||||
|
name = request.form.get("name")
|
||||||
|
url = request.form.get("url")
|
||||||
|
if not name:
|
||||||
|
return get_json_result(
|
||||||
|
data=False, retmsg='Lack of "name"', retcode=RetCode.ARGUMENT_ERROR)
|
||||||
|
if not url:
|
||||||
|
return get_json_result(
|
||||||
|
data=False, retmsg='Lack of "url"', retcode=RetCode.ARGUMENT_ERROR)
|
||||||
|
if not is_valid_url(url):
|
||||||
|
return get_json_result(
|
||||||
|
data=False, retmsg='The URL format is invalid', retcode=RetCode.ARGUMENT_ERROR)
|
||||||
|
e, kb = KnowledgebaseService.get_by_id(kb_id)
|
||||||
|
if not e:
|
||||||
|
raise LookupError("Can't find this knowledgebase!")
|
||||||
|
|
||||||
|
root_folder = FileService.get_root_folder(current_user.id)
|
||||||
|
pf_id = root_folder["id"]
|
||||||
|
FileService.init_knowledgebase_docs(pf_id, current_user.id)
|
||||||
|
kb_root_folder = FileService.get_kb_folder(current_user.id)
|
||||||
|
kb_folder = FileService.new_a_file_from_kb(kb.tenant_id, kb.name, kb_root_folder["id"])
|
||||||
|
|
||||||
|
try:
|
||||||
|
filename = duplicate_name(
|
||||||
|
DocumentService.query,
|
||||||
|
name=name+".pdf",
|
||||||
|
kb_id=kb.id)
|
||||||
|
filetype = filename_type(filename)
|
||||||
|
if filetype == FileType.OTHER.value:
|
||||||
|
raise RuntimeError("This type of file has not been supported yet!")
|
||||||
|
|
||||||
|
location = filename
|
||||||
|
while MINIO.obj_exist(kb_id, location):
|
||||||
|
location += "_"
|
||||||
|
blob = html2pdf(url)
|
||||||
|
MINIO.put(kb_id, location, blob)
|
||||||
|
doc = {
|
||||||
|
"id": get_uuid(),
|
||||||
|
"kb_id": kb.id,
|
||||||
|
"parser_id": kb.parser_id,
|
||||||
|
"parser_config": kb.parser_config,
|
||||||
|
"created_by": current_user.id,
|
||||||
|
"type": filetype,
|
||||||
|
"name": filename,
|
||||||
|
"location": location,
|
||||||
|
"size": len(blob),
|
||||||
|
"thumbnail": thumbnail(filename, blob)
|
||||||
|
}
|
||||||
|
if doc["type"] == FileType.VISUAL:
|
||||||
|
doc["parser_id"] = ParserType.PICTURE.value
|
||||||
|
if re.search(r"\.(ppt|pptx|pages)$", filename):
|
||||||
|
doc["parser_id"] = ParserType.PRESENTATION.value
|
||||||
|
DocumentService.insert(doc)
|
||||||
|
FileService.add_file_from_kb(doc, kb_folder["id"], kb.tenant_id)
|
||||||
|
except Exception as e:
|
||||||
|
return get_json_result(
|
||||||
|
data=False, retmsg=e, retcode=RetCode.SERVER_ERROR)
|
||||||
|
return get_json_result(data=True)
|
||||||
|
82
api/utils/web_utils.py
Normal file
82
api/utils/web_utils.py
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
import re
|
||||||
|
import json
|
||||||
|
import base64
|
||||||
|
|
||||||
|
from selenium import webdriver
|
||||||
|
from selenium.webdriver.chrome.options import Options
|
||||||
|
from selenium.webdriver.chrome.service import Service
|
||||||
|
from selenium.common.exceptions import TimeoutException
|
||||||
|
from selenium.webdriver.support.ui import WebDriverWait
|
||||||
|
from selenium.webdriver.support.expected_conditions import staleness_of
|
||||||
|
from webdriver_manager.chrome import ChromeDriverManager
|
||||||
|
from selenium.webdriver.common.by import By
|
||||||
|
|
||||||
|
|
||||||
|
def html2pdf(
|
||||||
|
source: str,
|
||||||
|
timeout: int = 2,
|
||||||
|
install_driver: bool = True,
|
||||||
|
print_options: dict = {},
|
||||||
|
):
|
||||||
|
result = __get_pdf_from_html(source, timeout, install_driver, print_options)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def __send_devtools(driver, cmd, params={}):
|
||||||
|
resource = "/session/%s/chromium/send_command_and_get_result" % driver.session_id
|
||||||
|
url = driver.command_executor._url + resource
|
||||||
|
body = json.dumps({"cmd": cmd, "params": params})
|
||||||
|
response = driver.command_executor._request("POST", url, body)
|
||||||
|
|
||||||
|
if not response:
|
||||||
|
raise Exception(response.get("value"))
|
||||||
|
|
||||||
|
return response.get("value")
|
||||||
|
|
||||||
|
|
||||||
|
def __get_pdf_from_html(
|
||||||
|
path: str,
|
||||||
|
timeout: int,
|
||||||
|
install_driver: bool,
|
||||||
|
print_options: dict
|
||||||
|
):
|
||||||
|
webdriver_options = Options()
|
||||||
|
webdriver_prefs = {}
|
||||||
|
webdriver_options.add_argument("--headless")
|
||||||
|
webdriver_options.add_argument("--disable-gpu")
|
||||||
|
webdriver_options.add_argument("--no-sandbox")
|
||||||
|
webdriver_options.add_argument("--disable-dev-shm-usage")
|
||||||
|
webdriver_options.experimental_options["prefs"] = webdriver_prefs
|
||||||
|
|
||||||
|
webdriver_prefs["profile.default_content_settings"] = {"images": 2}
|
||||||
|
|
||||||
|
if install_driver:
|
||||||
|
service = Service(ChromeDriverManager().install())
|
||||||
|
driver = webdriver.Chrome(service=service, options=webdriver_options)
|
||||||
|
else:
|
||||||
|
driver = webdriver.Chrome(options=webdriver_options)
|
||||||
|
|
||||||
|
driver.get(path)
|
||||||
|
|
||||||
|
try:
|
||||||
|
WebDriverWait(driver, timeout).until(
|
||||||
|
staleness_of(driver.find_element(by=By.TAG_NAME, value="html"))
|
||||||
|
)
|
||||||
|
except TimeoutException:
|
||||||
|
calculated_print_options = {
|
||||||
|
"landscape": False,
|
||||||
|
"displayHeaderFooter": False,
|
||||||
|
"printBackground": True,
|
||||||
|
"preferCSSPageSize": True,
|
||||||
|
}
|
||||||
|
calculated_print_options.update(print_options)
|
||||||
|
result = __send_devtools(
|
||||||
|
driver, "Page.printToPDF", calculated_print_options)
|
||||||
|
driver.quit()
|
||||||
|
return base64.b64decode(result["data"])
|
||||||
|
|
||||||
|
|
||||||
|
def is_valid_url(url: str) -> bool:
|
||||||
|
return bool(re.match(r"(https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]", url))
|
||||||
|
|
||||||
|
|
@ -138,4 +138,6 @@ umap-learn
|
|||||||
fasttext==0.9.2
|
fasttext==0.9.2
|
||||||
volcengine==1.0.141
|
volcengine==1.0.141
|
||||||
readability-lxml==0.8.1
|
readability-lxml==0.8.1
|
||||||
html_text==0.6.2
|
html_text==0.6.2
|
||||||
|
selenium==4.21.0
|
||||||
|
webdriver-manager==4.0.1
|
||||||
|
@ -1,13 +1,13 @@
|
|||||||
import { IChunk, IKnowledgeFile } from '@/interfaces/database/knowledge';
|
import {IChunk, IKnowledgeFile} from '@/interfaces/database/knowledge';
|
||||||
import { IChangeParserConfigRequestBody } from '@/interfaces/request/document';
|
import {IChangeParserConfigRequestBody} from '@/interfaces/request/document';
|
||||||
import { api_host } from '@/utils/api';
|
import {api_host} from '@/utils/api';
|
||||||
import { buildChunkHighlights } from '@/utils/documentUtils';
|
import {buildChunkHighlights} from '@/utils/documentUtils';
|
||||||
import { UploadFile } from 'antd';
|
import {UploadFile} from 'antd';
|
||||||
import { useCallback, useMemo, useState } from 'react';
|
import {useCallback, useMemo, useState} from 'react';
|
||||||
import { IHighlight } from 'react-pdf-highlighter';
|
import {IHighlight} from 'react-pdf-highlighter';
|
||||||
import { useDispatch, useSelector } from 'umi';
|
import {useDispatch, useSelector} from 'umi';
|
||||||
import { useGetKnowledgeSearchParams } from './routeHook';
|
import {useGetKnowledgeSearchParams} from './routeHook';
|
||||||
import { useOneNamespaceEffectsLoading } from './storeHooks';
|
import {useOneNamespaceEffectsLoading} from './storeHooks';
|
||||||
|
|
||||||
export const useGetDocumentUrl = (documentId?: string) => {
|
export const useGetDocumentUrl = (documentId?: string) => {
|
||||||
const getDocumentUrl = useCallback(
|
const getDocumentUrl = useCallback(
|
||||||
@ -207,6 +207,28 @@ export const useUploadDocument = () => {
|
|||||||
return uploadDocument;
|
return uploadDocument;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export const useWebCrawl = () => {
|
||||||
|
const dispatch = useDispatch();
|
||||||
|
const { knowledgeId } = useGetKnowledgeSearchParams();
|
||||||
|
return useCallback(
|
||||||
|
(name: string, url: string) => {
|
||||||
|
try {
|
||||||
|
return dispatch<any>({
|
||||||
|
type: 'kFModel/web_crawl',
|
||||||
|
payload: {
|
||||||
|
name,
|
||||||
|
url,
|
||||||
|
kb_id: knowledgeId,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
} catch (errorInfo) {
|
||||||
|
console.log('Failed:', errorInfo);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
[dispatch],
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
export const useRunDocument = () => {
|
export const useRunDocument = () => {
|
||||||
const dispatch = useDispatch();
|
const dispatch = useDispatch();
|
||||||
|
|
||||||
|
@ -81,6 +81,7 @@ export default {
|
|||||||
searchFiles: 'Search your files',
|
searchFiles: 'Search your files',
|
||||||
localFiles: 'Local files',
|
localFiles: 'Local files',
|
||||||
emptyFiles: 'Create empty file',
|
emptyFiles: 'Create empty file',
|
||||||
|
webCrawl: 'Web Crawl',
|
||||||
chunkNumber: 'Chunk Number',
|
chunkNumber: 'Chunk Number',
|
||||||
uploadDate: 'Upload Date',
|
uploadDate: 'Upload Date',
|
||||||
chunkMethod: 'Chunk Method',
|
chunkMethod: 'Chunk Method',
|
||||||
|
@ -80,6 +80,7 @@ export default {
|
|||||||
searchFiles: '搜索文件',
|
searchFiles: '搜索文件',
|
||||||
localFiles: '本地文件',
|
localFiles: '本地文件',
|
||||||
emptyFiles: '新建空文件',
|
emptyFiles: '新建空文件',
|
||||||
|
webCrawl: '網頁抓取',
|
||||||
chunkNumber: '分塊數',
|
chunkNumber: '分塊數',
|
||||||
uploadDate: '上傳日期',
|
uploadDate: '上傳日期',
|
||||||
chunkMethod: '解析方法',
|
chunkMethod: '解析方法',
|
||||||
|
@ -80,6 +80,7 @@ export default {
|
|||||||
searchFiles: '搜索文件',
|
searchFiles: '搜索文件',
|
||||||
localFiles: '本地文件',
|
localFiles: '本地文件',
|
||||||
emptyFiles: '新建空文件',
|
emptyFiles: '新建空文件',
|
||||||
|
webCrawl: '网页抓取',
|
||||||
chunkNumber: '分块数',
|
chunkNumber: '分块数',
|
||||||
uploadDate: '上传日期',
|
uploadDate: '上传日期',
|
||||||
chunkMethod: '解析方法',
|
chunkMethod: '解析方法',
|
||||||
|
@ -29,13 +29,15 @@ import styles from './index.less';
|
|||||||
interface IProps {
|
interface IProps {
|
||||||
selectedRowKeys: string[];
|
selectedRowKeys: string[];
|
||||||
showCreateModal(): void;
|
showCreateModal(): void;
|
||||||
|
showWebCrawlModal(): void;
|
||||||
showDocumentUploadModal(): void;
|
showDocumentUploadModal(): void;
|
||||||
}
|
}
|
||||||
|
|
||||||
const DocumentToolbar = ({
|
const DocumentToolbar = ({
|
||||||
selectedRowKeys,
|
selectedRowKeys,
|
||||||
showCreateModal,
|
showCreateModal,
|
||||||
showDocumentUploadModal,
|
showWebCrawlModal,
|
||||||
|
showDocumentUploadModal,
|
||||||
}: IProps) => {
|
}: IProps) => {
|
||||||
const { t } = useTranslate('knowledgeDetails');
|
const { t } = useTranslate('knowledgeDetails');
|
||||||
const { fetchDocumentList } = useFetchDocumentListOnMount();
|
const { fetchDocumentList } = useFetchDocumentListOnMount();
|
||||||
@ -66,6 +68,19 @@ const DocumentToolbar = ({
|
|||||||
{ type: 'divider' },
|
{ type: 'divider' },
|
||||||
{
|
{
|
||||||
key: '2',
|
key: '2',
|
||||||
|
onClick: showWebCrawlModal,
|
||||||
|
label: (
|
||||||
|
<div>
|
||||||
|
<Button type="link">
|
||||||
|
<FileTextOutlined />
|
||||||
|
{t('webCrawl')}
|
||||||
|
</Button>
|
||||||
|
</div>
|
||||||
|
),
|
||||||
|
},
|
||||||
|
{ type: 'divider' },
|
||||||
|
{
|
||||||
|
key: '3',
|
||||||
onClick: showCreateModal,
|
onClick: showCreateModal,
|
||||||
label: (
|
label: (
|
||||||
<div>
|
<div>
|
||||||
@ -77,7 +92,7 @@ const DocumentToolbar = ({
|
|||||||
),
|
),
|
||||||
},
|
},
|
||||||
];
|
];
|
||||||
}, [showDocumentUploadModal, showCreateModal, t]);
|
}, [showDocumentUploadModal, showWebCrawlModal, showCreateModal, t]);
|
||||||
|
|
||||||
const handleDelete = useCallback(() => {
|
const handleDelete = useCallback(() => {
|
||||||
showDeleteConfirm({
|
showDeleteConfirm({
|
||||||
|
@ -7,6 +7,7 @@ import {
|
|||||||
useSelectRunDocumentLoading,
|
useSelectRunDocumentLoading,
|
||||||
useSetDocumentParser,
|
useSetDocumentParser,
|
||||||
useUploadDocument,
|
useUploadDocument,
|
||||||
|
useWebCrawl,
|
||||||
} from '@/hooks/documentHooks';
|
} from '@/hooks/documentHooks';
|
||||||
import { useGetKnowledgeSearchParams } from '@/hooks/routeHook';
|
import { useGetKnowledgeSearchParams } from '@/hooks/routeHook';
|
||||||
import { useOneNamespaceEffectsLoading } from '@/hooks/storeHooks';
|
import { useOneNamespaceEffectsLoading } from '@/hooks/storeHooks';
|
||||||
@ -286,6 +287,37 @@ export const useHandleUploadDocument = () => {
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export const useHandleWebCrawl = () => {
|
||||||
|
const {
|
||||||
|
visible: webCrawlUploadVisible,
|
||||||
|
hideModal: hideWebCrawlUploadModal,
|
||||||
|
showModal: showWebCrawlUploadModal,
|
||||||
|
} = useSetModalState();
|
||||||
|
const webCrawl = useWebCrawl();
|
||||||
|
|
||||||
|
const onWebCrawlUploadOk = useCallback(
|
||||||
|
async (name: string, url: string ) => {
|
||||||
|
const ret = await webCrawl(name, url);
|
||||||
|
if (ret === 0) {
|
||||||
|
hideWebCrawlUploadModal();
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return -1
|
||||||
|
},
|
||||||
|
[webCrawl, hideWebCrawlUploadModal],
|
||||||
|
);
|
||||||
|
|
||||||
|
const loading = useOneNamespaceEffectsLoading('kFModel', ['web_crawl']);
|
||||||
|
|
||||||
|
return {
|
||||||
|
webCrawlUploadLoading: loading,
|
||||||
|
onWebCrawlUploadOk,
|
||||||
|
webCrawlUploadVisible,
|
||||||
|
hideWebCrawlUploadModal,
|
||||||
|
showWebCrawlUploadModal,
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
export const useHandleRunDocumentByIds = (id: string) => {
|
export const useHandleRunDocumentByIds = (id: string) => {
|
||||||
const loading = useSelectRunDocumentLoading();
|
const loading = useSelectRunDocumentLoading();
|
||||||
const runDocumentByIds = useRunDocument();
|
const runDocumentByIds = useRunDocument();
|
||||||
|
@ -12,6 +12,7 @@ import { Divider, Flex, Switch, Table, Typography } from 'antd';
|
|||||||
import type { ColumnsType } from 'antd/es/table';
|
import type { ColumnsType } from 'antd/es/table';
|
||||||
import { useTranslation } from 'react-i18next';
|
import { useTranslation } from 'react-i18next';
|
||||||
import CreateFileModal from './create-file-modal';
|
import CreateFileModal from './create-file-modal';
|
||||||
|
import WebCrawlModal from './web-crawl-modal';
|
||||||
import DocumentToolbar from './document-toolbar';
|
import DocumentToolbar from './document-toolbar';
|
||||||
import {
|
import {
|
||||||
useChangeDocumentParser,
|
useChangeDocumentParser,
|
||||||
@ -19,7 +20,7 @@ import {
|
|||||||
useFetchDocumentListOnMount,
|
useFetchDocumentListOnMount,
|
||||||
useGetPagination,
|
useGetPagination,
|
||||||
useGetRowSelection,
|
useGetRowSelection,
|
||||||
useHandleUploadDocument,
|
useHandleUploadDocument, useHandleWebCrawl,
|
||||||
useNavigateToOtherPage,
|
useNavigateToOtherPage,
|
||||||
useRenameDocument,
|
useRenameDocument,
|
||||||
} from './hooks';
|
} from './hooks';
|
||||||
@ -69,6 +70,13 @@ const KnowledgeFile = () => {
|
|||||||
onDocumentUploadOk,
|
onDocumentUploadOk,
|
||||||
documentUploadLoading,
|
documentUploadLoading,
|
||||||
} = useHandleUploadDocument();
|
} = useHandleUploadDocument();
|
||||||
|
const {
|
||||||
|
webCrawlUploadVisible,
|
||||||
|
hideWebCrawlUploadModal,
|
||||||
|
showWebCrawlUploadModal,
|
||||||
|
onWebCrawlUploadOk,
|
||||||
|
webCrawlUploadLoading,
|
||||||
|
} = useHandleWebCrawl();
|
||||||
const { t } = useTranslation('translation', {
|
const { t } = useTranslation('translation', {
|
||||||
keyPrefix: 'knowledgeDetails',
|
keyPrefix: 'knowledgeDetails',
|
||||||
});
|
});
|
||||||
@ -170,6 +178,7 @@ const KnowledgeFile = () => {
|
|||||||
<DocumentToolbar
|
<DocumentToolbar
|
||||||
selectedRowKeys={rowSelection.selectedRowKeys as string[]}
|
selectedRowKeys={rowSelection.selectedRowKeys as string[]}
|
||||||
showCreateModal={showCreateModal}
|
showCreateModal={showCreateModal}
|
||||||
|
showWebCrawlModal={showWebCrawlUploadModal}
|
||||||
showDocumentUploadModal={showDocumentUploadModal}
|
showDocumentUploadModal={showDocumentUploadModal}
|
||||||
></DocumentToolbar>
|
></DocumentToolbar>
|
||||||
<Table
|
<Table
|
||||||
@ -211,6 +220,12 @@ const KnowledgeFile = () => {
|
|||||||
loading={documentUploadLoading}
|
loading={documentUploadLoading}
|
||||||
onOk={onDocumentUploadOk}
|
onOk={onDocumentUploadOk}
|
||||||
></FileUploadModal>
|
></FileUploadModal>
|
||||||
|
<WebCrawlModal
|
||||||
|
visible={webCrawlUploadVisible}
|
||||||
|
hideModal={hideWebCrawlUploadModal}
|
||||||
|
loading={webCrawlUploadLoading}
|
||||||
|
onOk={onWebCrawlUploadOk}
|
||||||
|
></WebCrawlModal>
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
@ -232,6 +232,27 @@ const model: DvaModel<KFModelState> = {
|
|||||||
}
|
}
|
||||||
return data;
|
return data;
|
||||||
},
|
},
|
||||||
|
*web_crawl({ payload = {} }, { call, put }) {
|
||||||
|
const formData = new FormData();
|
||||||
|
formData.append('name', payload.name);
|
||||||
|
formData.append('url', payload.url);
|
||||||
|
formData.append('kb_id', payload.kb_id);
|
||||||
|
|
||||||
|
const { data } = yield call(kbService.web_crawl, formData);
|
||||||
|
|
||||||
|
const succeed = data.retcode === 0;
|
||||||
|
|
||||||
|
if (succeed) {
|
||||||
|
message.success(i18n.t('message.uploaded'));
|
||||||
|
}
|
||||||
|
if (succeed || data.retcode === 500) {
|
||||||
|
yield put({
|
||||||
|
type: 'getKfList',
|
||||||
|
payload: { kb_id: payload.kb_id },
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return data.retcode;
|
||||||
|
},
|
||||||
},
|
},
|
||||||
subscriptions: {
|
subscriptions: {
|
||||||
setup({ dispatch, history }) {
|
setup({ dispatch, history }) {
|
||||||
|
@ -0,0 +1,54 @@
|
|||||||
|
import { IModalManagerChildrenProps } from '@/components/modal-manager';
|
||||||
|
import { Form, Input, Modal } from 'antd';
|
||||||
|
import React from 'react';
|
||||||
|
import {useTranslate} from "@/hooks/commonHooks";
|
||||||
|
|
||||||
|
|
||||||
|
interface IProps extends Omit<IModalManagerChildrenProps, 'showModal'> {
|
||||||
|
loading: boolean;
|
||||||
|
onOk: (name: string, url: string) => void;
|
||||||
|
showModal?(): void;
|
||||||
|
}
|
||||||
|
|
||||||
|
const WebCrawlModal: React.FC<IProps> = ({ visible, hideModal, onOk }) => {
|
||||||
|
const [form] = Form.useForm();
|
||||||
|
const { t } = useTranslate('knowledgeDetails');
|
||||||
|
const handleOk = async () => {
|
||||||
|
const values = await form.validateFields();
|
||||||
|
onOk(values.name, values.url);
|
||||||
|
};
|
||||||
|
|
||||||
|
return (
|
||||||
|
<Modal
|
||||||
|
title={t('webCrawl')}
|
||||||
|
open={visible}
|
||||||
|
onOk={handleOk}
|
||||||
|
onCancel={hideModal}
|
||||||
|
>
|
||||||
|
<Form
|
||||||
|
form={form}
|
||||||
|
name="validateOnly"
|
||||||
|
labelCol={{ span: 4 }}
|
||||||
|
wrapperCol={{ span: 20 }}
|
||||||
|
style={{ maxWidth: 600 }}
|
||||||
|
autoComplete="off"
|
||||||
|
>
|
||||||
|
<Form.Item
|
||||||
|
label="Name"
|
||||||
|
name="name"
|
||||||
|
rules={[{ required: true, message: 'Please input name!' },{ max: 10, message: 'The maximum length of name is 128 characters' }]}
|
||||||
|
>
|
||||||
|
<Input placeholder="Document name" />
|
||||||
|
</Form.Item>
|
||||||
|
<Form.Item
|
||||||
|
label="URL"
|
||||||
|
name="url"
|
||||||
|
rules={[{ required: true, message: 'Please input url!' },{pattern: new RegExp('(https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]'), message: 'Please enter a valid URL!'}]}
|
||||||
|
>
|
||||||
|
<Input placeholder="https://www.baidu.com" />
|
||||||
|
</Form.Item>
|
||||||
|
</Form>
|
||||||
|
</Modal>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
export default WebCrawlModal;
|
@ -26,6 +26,7 @@ const {
|
|||||||
document_run,
|
document_run,
|
||||||
get_document_file,
|
get_document_file,
|
||||||
document_upload,
|
document_upload,
|
||||||
|
web_crawl,
|
||||||
} = api;
|
} = api;
|
||||||
|
|
||||||
const methods = {
|
const methods = {
|
||||||
@ -87,6 +88,10 @@ const methods = {
|
|||||||
url: document_upload,
|
url: document_upload,
|
||||||
method: 'post',
|
method: 'post',
|
||||||
},
|
},
|
||||||
|
web_crawl: {
|
||||||
|
url: web_crawl,
|
||||||
|
method: 'post',
|
||||||
|
},
|
||||||
// chunk管理
|
// chunk管理
|
||||||
chunk_list: {
|
chunk_list: {
|
||||||
url: chunk_list,
|
url: chunk_list,
|
||||||
|
@ -48,6 +48,7 @@ export default {
|
|||||||
document_thumbnails: `${api_host}/document/thumbnails`,
|
document_thumbnails: `${api_host}/document/thumbnails`,
|
||||||
get_document_file: `${api_host}/document/get`,
|
get_document_file: `${api_host}/document/get`,
|
||||||
document_upload: `${api_host}/document/upload`,
|
document_upload: `${api_host}/document/upload`,
|
||||||
|
web_crawl: `${api_host}/document/web_crawl`,
|
||||||
|
|
||||||
// chat
|
// chat
|
||||||
setDialog: `${api_host}/dialog/set`,
|
setDialog: `${api_host}/dialog/set`,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user