mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-08-14 06:45:58 +08:00
Feature/mutil embedding model (#908)
Co-authored-by: JzoNg <jzongcode@gmail.com> Co-authored-by: jyong <jyong@dify.ai> Co-authored-by: StyleZhang <jasonapring2015@outlook.com>
This commit is contained in:
parent
4420281d96
commit
db7156dafd
@ -10,13 +10,15 @@ from controllers.console.datasets.error import DatasetNameDuplicateError
|
|||||||
from controllers.console.setup import setup_required
|
from controllers.console.setup import setup_required
|
||||||
from controllers.console.wraps import account_initialization_required
|
from controllers.console.wraps import account_initialization_required
|
||||||
from core.indexing_runner import IndexingRunner
|
from core.indexing_runner import IndexingRunner
|
||||||
from core.model_providers.error import LLMBadRequestError
|
from core.model_providers.error import LLMBadRequestError, ProviderTokenNotInitError
|
||||||
from core.model_providers.model_factory import ModelFactory
|
from core.model_providers.model_factory import ModelFactory
|
||||||
|
from core.model_providers.models.entity.model_params import ModelType
|
||||||
from libs.helper import TimestampField
|
from libs.helper import TimestampField
|
||||||
from extensions.ext_database import db
|
from extensions.ext_database import db
|
||||||
from models.dataset import DocumentSegment, Document
|
from models.dataset import DocumentSegment, Document
|
||||||
from models.model import UploadFile
|
from models.model import UploadFile
|
||||||
from services.dataset_service import DatasetService, DocumentService
|
from services.dataset_service import DatasetService, DocumentService
|
||||||
|
from services.provider_service import ProviderService
|
||||||
|
|
||||||
dataset_detail_fields = {
|
dataset_detail_fields = {
|
||||||
'id': fields.String,
|
'id': fields.String,
|
||||||
@ -33,6 +35,9 @@ dataset_detail_fields = {
|
|||||||
'created_at': TimestampField,
|
'created_at': TimestampField,
|
||||||
'updated_by': fields.String,
|
'updated_by': fields.String,
|
||||||
'updated_at': TimestampField,
|
'updated_at': TimestampField,
|
||||||
|
'embedding_model': fields.String,
|
||||||
|
'embedding_model_provider': fields.String,
|
||||||
|
'embedding_available': fields.Boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
dataset_query_detail_fields = {
|
dataset_query_detail_fields = {
|
||||||
@ -74,8 +79,22 @@ class DatasetListApi(Resource):
|
|||||||
datasets, total = DatasetService.get_datasets(page, limit, provider,
|
datasets, total = DatasetService.get_datasets(page, limit, provider,
|
||||||
current_user.current_tenant_id, current_user)
|
current_user.current_tenant_id, current_user)
|
||||||
|
|
||||||
|
# check embedding setting
|
||||||
|
provider_service = ProviderService()
|
||||||
|
valid_model_list = provider_service.get_valid_model_list(current_user.current_tenant_id, ModelType.EMBEDDINGS.value)
|
||||||
|
# if len(valid_model_list) == 0:
|
||||||
|
# raise ProviderNotInitializeError(
|
||||||
|
# f"No Embedding Model available. Please configure a valid provider "
|
||||||
|
# f"in the Settings -> Model Provider.")
|
||||||
|
model_names = [item['model_name'] for item in valid_model_list]
|
||||||
|
data = marshal(datasets, dataset_detail_fields)
|
||||||
|
for item in data:
|
||||||
|
if item['embedding_model'] in model_names:
|
||||||
|
item['embedding_available'] = True
|
||||||
|
else:
|
||||||
|
item['embedding_available'] = False
|
||||||
response = {
|
response = {
|
||||||
'data': marshal(datasets, dataset_detail_fields),
|
'data': data,
|
||||||
'has_more': len(datasets) == limit,
|
'has_more': len(datasets) == limit,
|
||||||
'limit': limit,
|
'limit': limit,
|
||||||
'total': total,
|
'total': total,
|
||||||
@ -99,7 +118,6 @@ class DatasetListApi(Resource):
|
|||||||
# The role of the current user in the ta table must be admin or owner
|
# The role of the current user in the ta table must be admin or owner
|
||||||
if current_user.current_tenant.current_role not in ['admin', 'owner']:
|
if current_user.current_tenant.current_role not in ['admin', 'owner']:
|
||||||
raise Forbidden()
|
raise Forbidden()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
ModelFactory.get_embedding_model(
|
ModelFactory.get_embedding_model(
|
||||||
tenant_id=current_user.current_tenant_id
|
tenant_id=current_user.current_tenant_id
|
||||||
@ -233,6 +251,8 @@ class DatasetIndexingEstimateApi(Resource):
|
|||||||
parser.add_argument('info_list', type=dict, required=True, nullable=True, location='json')
|
parser.add_argument('info_list', type=dict, required=True, nullable=True, location='json')
|
||||||
parser.add_argument('process_rule', type=dict, required=True, nullable=True, location='json')
|
parser.add_argument('process_rule', type=dict, required=True, nullable=True, location='json')
|
||||||
parser.add_argument('doc_form', type=str, default='text_model', required=False, nullable=False, location='json')
|
parser.add_argument('doc_form', type=str, default='text_model', required=False, nullable=False, location='json')
|
||||||
|
parser.add_argument('dataset_id', type=str, required=False, nullable=False, location='json')
|
||||||
|
parser.add_argument('doc_language', type=str, default='English', required=False, nullable=False, location='json')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
# validate args
|
# validate args
|
||||||
DocumentService.estimate_args_validate(args)
|
DocumentService.estimate_args_validate(args)
|
||||||
@ -250,11 +270,14 @@ class DatasetIndexingEstimateApi(Resource):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
response = indexing_runner.file_indexing_estimate(current_user.current_tenant_id, file_details,
|
response = indexing_runner.file_indexing_estimate(current_user.current_tenant_id, file_details,
|
||||||
args['process_rule'], args['doc_form'])
|
args['process_rule'], args['doc_form'],
|
||||||
|
args['doc_language'], args['dataset_id'])
|
||||||
except LLMBadRequestError:
|
except LLMBadRequestError:
|
||||||
raise ProviderNotInitializeError(
|
raise ProviderNotInitializeError(
|
||||||
f"No Embedding Model available. Please configure a valid provider "
|
f"No Embedding Model available. Please configure a valid provider "
|
||||||
f"in the Settings -> Model Provider.")
|
f"in the Settings -> Model Provider.")
|
||||||
|
except ProviderTokenNotInitError as ex:
|
||||||
|
raise ProviderNotInitializeError(ex.description)
|
||||||
elif args['info_list']['data_source_type'] == 'notion_import':
|
elif args['info_list']['data_source_type'] == 'notion_import':
|
||||||
|
|
||||||
indexing_runner = IndexingRunner()
|
indexing_runner = IndexingRunner()
|
||||||
@ -262,11 +285,14 @@ class DatasetIndexingEstimateApi(Resource):
|
|||||||
try:
|
try:
|
||||||
response = indexing_runner.notion_indexing_estimate(current_user.current_tenant_id,
|
response = indexing_runner.notion_indexing_estimate(current_user.current_tenant_id,
|
||||||
args['info_list']['notion_info_list'],
|
args['info_list']['notion_info_list'],
|
||||||
args['process_rule'], args['doc_form'])
|
args['process_rule'], args['doc_form'],
|
||||||
|
args['doc_language'], args['dataset_id'])
|
||||||
except LLMBadRequestError:
|
except LLMBadRequestError:
|
||||||
raise ProviderNotInitializeError(
|
raise ProviderNotInitializeError(
|
||||||
f"No Embedding Model available. Please configure a valid provider "
|
f"No Embedding Model available. Please configure a valid provider "
|
||||||
f"in the Settings -> Model Provider.")
|
f"in the Settings -> Model Provider.")
|
||||||
|
except ProviderTokenNotInitError as ex:
|
||||||
|
raise ProviderNotInitializeError(ex.description)
|
||||||
else:
|
else:
|
||||||
raise ValueError('Data source type not support')
|
raise ValueError('Data source type not support')
|
||||||
return response, 200
|
return response, 200
|
||||||
|
@ -274,6 +274,7 @@ class DatasetDocumentListApi(Resource):
|
|||||||
parser.add_argument('duplicate', type=bool, nullable=False, location='json')
|
parser.add_argument('duplicate', type=bool, nullable=False, location='json')
|
||||||
parser.add_argument('original_document_id', type=str, required=False, location='json')
|
parser.add_argument('original_document_id', type=str, required=False, location='json')
|
||||||
parser.add_argument('doc_form', type=str, default='text_model', required=False, nullable=False, location='json')
|
parser.add_argument('doc_form', type=str, default='text_model', required=False, nullable=False, location='json')
|
||||||
|
parser.add_argument('doc_language', type=str, default='English', required=False, nullable=False, location='json')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
if not dataset.indexing_technique and not args['indexing_technique']:
|
if not dataset.indexing_technique and not args['indexing_technique']:
|
||||||
@ -282,14 +283,19 @@ class DatasetDocumentListApi(Resource):
|
|||||||
# validate args
|
# validate args
|
||||||
DocumentService.document_create_args_validate(args)
|
DocumentService.document_create_args_validate(args)
|
||||||
|
|
||||||
|
# check embedding model setting
|
||||||
try:
|
try:
|
||||||
ModelFactory.get_embedding_model(
|
ModelFactory.get_embedding_model(
|
||||||
tenant_id=current_user.current_tenant_id
|
tenant_id=current_user.current_tenant_id,
|
||||||
|
model_provider_name=dataset.embedding_model_provider,
|
||||||
|
model_name=dataset.embedding_model
|
||||||
)
|
)
|
||||||
except LLMBadRequestError:
|
except LLMBadRequestError:
|
||||||
raise ProviderNotInitializeError(
|
raise ProviderNotInitializeError(
|
||||||
f"No Embedding Model available. Please configure a valid provider "
|
f"No Embedding Model available. Please configure a valid provider "
|
||||||
f"in the Settings -> Model Provider.")
|
f"in the Settings -> Model Provider.")
|
||||||
|
except ProviderTokenNotInitError as ex:
|
||||||
|
raise ProviderNotInitializeError(ex.description)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
documents, batch = DocumentService.save_document_with_dataset_id(dataset, args, current_user)
|
documents, batch = DocumentService.save_document_with_dataset_id(dataset, args, current_user)
|
||||||
@ -328,6 +334,7 @@ class DatasetInitApi(Resource):
|
|||||||
parser.add_argument('data_source', type=dict, required=True, nullable=True, location='json')
|
parser.add_argument('data_source', type=dict, required=True, nullable=True, location='json')
|
||||||
parser.add_argument('process_rule', type=dict, required=True, nullable=True, location='json')
|
parser.add_argument('process_rule', type=dict, required=True, nullable=True, location='json')
|
||||||
parser.add_argument('doc_form', type=str, default='text_model', required=False, nullable=False, location='json')
|
parser.add_argument('doc_form', type=str, default='text_model', required=False, nullable=False, location='json')
|
||||||
|
parser.add_argument('doc_language', type=str, default='English', required=False, nullable=False, location='json')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -406,11 +413,13 @@ class DocumentIndexingEstimateApi(DocumentResource):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
response = indexing_runner.file_indexing_estimate(current_user.current_tenant_id, [file],
|
response = indexing_runner.file_indexing_estimate(current_user.current_tenant_id, [file],
|
||||||
data_process_rule_dict)
|
data_process_rule_dict, None, dataset_id)
|
||||||
except LLMBadRequestError:
|
except LLMBadRequestError:
|
||||||
raise ProviderNotInitializeError(
|
raise ProviderNotInitializeError(
|
||||||
f"No Embedding Model available. Please configure a valid provider "
|
f"No Embedding Model available. Please configure a valid provider "
|
||||||
f"in the Settings -> Model Provider.")
|
f"in the Settings -> Model Provider.")
|
||||||
|
except ProviderTokenNotInitError as ex:
|
||||||
|
raise ProviderNotInitializeError(ex.description)
|
||||||
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
@ -473,22 +482,27 @@ class DocumentBatchIndexingEstimateApi(DocumentResource):
|
|||||||
indexing_runner = IndexingRunner()
|
indexing_runner = IndexingRunner()
|
||||||
try:
|
try:
|
||||||
response = indexing_runner.file_indexing_estimate(current_user.current_tenant_id, file_details,
|
response = indexing_runner.file_indexing_estimate(current_user.current_tenant_id, file_details,
|
||||||
data_process_rule_dict)
|
data_process_rule_dict, None, dataset_id)
|
||||||
except LLMBadRequestError:
|
except LLMBadRequestError:
|
||||||
raise ProviderNotInitializeError(
|
raise ProviderNotInitializeError(
|
||||||
f"No Embedding Model available. Please configure a valid provider "
|
f"No Embedding Model available. Please configure a valid provider "
|
||||||
f"in the Settings -> Model Provider.")
|
f"in the Settings -> Model Provider.")
|
||||||
elif dataset.data_source_type:
|
except ProviderTokenNotInitError as ex:
|
||||||
|
raise ProviderNotInitializeError(ex.description)
|
||||||
|
elif dataset.data_source_type == 'notion_import':
|
||||||
|
|
||||||
indexing_runner = IndexingRunner()
|
indexing_runner = IndexingRunner()
|
||||||
try:
|
try:
|
||||||
response = indexing_runner.notion_indexing_estimate(current_user.current_tenant_id,
|
response = indexing_runner.notion_indexing_estimate(current_user.current_tenant_id,
|
||||||
info_list,
|
info_list,
|
||||||
data_process_rule_dict)
|
data_process_rule_dict,
|
||||||
|
None, dataset_id)
|
||||||
except LLMBadRequestError:
|
except LLMBadRequestError:
|
||||||
raise ProviderNotInitializeError(
|
raise ProviderNotInitializeError(
|
||||||
f"No Embedding Model available. Please configure a valid provider "
|
f"No Embedding Model available. Please configure a valid provider "
|
||||||
f"in the Settings -> Model Provider.")
|
f"in the Settings -> Model Provider.")
|
||||||
|
except ProviderTokenNotInitError as ex:
|
||||||
|
raise ProviderNotInitializeError(ex.description)
|
||||||
else:
|
else:
|
||||||
raise ValueError('Data source type not support')
|
raise ValueError('Data source type not support')
|
||||||
return response
|
return response
|
||||||
@ -575,7 +589,8 @@ class DocumentIndexingStatusApi(DocumentResource):
|
|||||||
|
|
||||||
document.completed_segments = completed_segments
|
document.completed_segments = completed_segments
|
||||||
document.total_segments = total_segments
|
document.total_segments = total_segments
|
||||||
|
if document.is_paused:
|
||||||
|
document.indexing_status = 'paused'
|
||||||
return marshal(document, self.document_status_fields)
|
return marshal(document, self.document_status_fields)
|
||||||
|
|
||||||
|
|
||||||
@ -832,6 +847,22 @@ class DocumentStatusApi(DocumentResource):
|
|||||||
|
|
||||||
remove_document_from_index_task.delay(document_id)
|
remove_document_from_index_task.delay(document_id)
|
||||||
|
|
||||||
|
return {'result': 'success'}, 200
|
||||||
|
elif action == "un_archive":
|
||||||
|
if not document.archived:
|
||||||
|
raise InvalidActionError('Document is not archived.')
|
||||||
|
|
||||||
|
document.archived = False
|
||||||
|
document.archived_at = None
|
||||||
|
document.archived_by = None
|
||||||
|
document.updated_at = datetime.utcnow()
|
||||||
|
db.session.commit()
|
||||||
|
|
||||||
|
# Set cache to prevent indexing the same document multiple times
|
||||||
|
redis_client.setex(indexing_cache_key, 600, 1)
|
||||||
|
|
||||||
|
add_document_to_index_task.delay(document_id)
|
||||||
|
|
||||||
return {'result': 'success'}, 200
|
return {'result': 'success'}, 200
|
||||||
else:
|
else:
|
||||||
raise InvalidActionError()
|
raise InvalidActionError()
|
||||||
|
@ -1,15 +1,20 @@
|
|||||||
# -*- coding:utf-8 -*-
|
# -*- coding:utf-8 -*-
|
||||||
|
import uuid
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
|
from flask import request
|
||||||
from flask_login import login_required, current_user
|
from flask_login import login_required, current_user
|
||||||
from flask_restful import Resource, reqparse, fields, marshal
|
from flask_restful import Resource, reqparse, fields, marshal
|
||||||
from werkzeug.exceptions import NotFound, Forbidden
|
from werkzeug.exceptions import NotFound, Forbidden
|
||||||
|
|
||||||
import services
|
import services
|
||||||
from controllers.console import api
|
from controllers.console import api
|
||||||
from controllers.console.datasets.error import InvalidActionError
|
from controllers.console.app.error import ProviderNotInitializeError
|
||||||
|
from controllers.console.datasets.error import InvalidActionError, NoFileUploadedError, TooManyFilesError
|
||||||
from controllers.console.setup import setup_required
|
from controllers.console.setup import setup_required
|
||||||
from controllers.console.wraps import account_initialization_required
|
from controllers.console.wraps import account_initialization_required
|
||||||
|
from core.model_providers.error import LLMBadRequestError, ProviderTokenNotInitError
|
||||||
|
from core.model_providers.model_factory import ModelFactory
|
||||||
from extensions.ext_database import db
|
from extensions.ext_database import db
|
||||||
from extensions.ext_redis import redis_client
|
from extensions.ext_redis import redis_client
|
||||||
from models.dataset import DocumentSegment
|
from models.dataset import DocumentSegment
|
||||||
@ -17,7 +22,9 @@ from models.dataset import DocumentSegment
|
|||||||
from libs.helper import TimestampField
|
from libs.helper import TimestampField
|
||||||
from services.dataset_service import DatasetService, DocumentService, SegmentService
|
from services.dataset_service import DatasetService, DocumentService, SegmentService
|
||||||
from tasks.enable_segment_to_index_task import enable_segment_to_index_task
|
from tasks.enable_segment_to_index_task import enable_segment_to_index_task
|
||||||
from tasks.remove_segment_from_index_task import remove_segment_from_index_task
|
from tasks.disable_segment_from_index_task import disable_segment_from_index_task
|
||||||
|
from tasks.batch_create_segment_to_index_task import batch_create_segment_to_index_task
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
segment_fields = {
|
segment_fields = {
|
||||||
'id': fields.String,
|
'id': fields.String,
|
||||||
@ -152,6 +159,20 @@ class DatasetDocumentSegmentApi(Resource):
|
|||||||
except services.errors.account.NoPermissionError as e:
|
except services.errors.account.NoPermissionError as e:
|
||||||
raise Forbidden(str(e))
|
raise Forbidden(str(e))
|
||||||
|
|
||||||
|
# check embedding model setting
|
||||||
|
try:
|
||||||
|
ModelFactory.get_embedding_model(
|
||||||
|
tenant_id=current_user.current_tenant_id,
|
||||||
|
model_provider_name=dataset.embedding_model_provider,
|
||||||
|
model_name=dataset.embedding_model
|
||||||
|
)
|
||||||
|
except LLMBadRequestError:
|
||||||
|
raise ProviderNotInitializeError(
|
||||||
|
f"No Embedding Model available. Please configure a valid provider "
|
||||||
|
f"in the Settings -> Model Provider.")
|
||||||
|
except ProviderTokenNotInitError as ex:
|
||||||
|
raise ProviderNotInitializeError(ex.description)
|
||||||
|
|
||||||
segment = DocumentSegment.query.filter(
|
segment = DocumentSegment.query.filter(
|
||||||
DocumentSegment.id == str(segment_id),
|
DocumentSegment.id == str(segment_id),
|
||||||
DocumentSegment.tenant_id == current_user.current_tenant_id
|
DocumentSegment.tenant_id == current_user.current_tenant_id
|
||||||
@ -197,7 +218,7 @@ class DatasetDocumentSegmentApi(Resource):
|
|||||||
# Set cache to prevent indexing the same segment multiple times
|
# Set cache to prevent indexing the same segment multiple times
|
||||||
redis_client.setex(indexing_cache_key, 600, 1)
|
redis_client.setex(indexing_cache_key, 600, 1)
|
||||||
|
|
||||||
remove_segment_from_index_task.delay(segment.id)
|
disable_segment_from_index_task.delay(segment.id)
|
||||||
|
|
||||||
return {'result': 'success'}, 200
|
return {'result': 'success'}, 200
|
||||||
else:
|
else:
|
||||||
@ -222,6 +243,19 @@ class DatasetDocumentSegmentAddApi(Resource):
|
|||||||
# The role of the current user in the ta table must be admin or owner
|
# The role of the current user in the ta table must be admin or owner
|
||||||
if current_user.current_tenant.current_role not in ['admin', 'owner']:
|
if current_user.current_tenant.current_role not in ['admin', 'owner']:
|
||||||
raise Forbidden()
|
raise Forbidden()
|
||||||
|
# check embedding model setting
|
||||||
|
try:
|
||||||
|
ModelFactory.get_embedding_model(
|
||||||
|
tenant_id=current_user.current_tenant_id,
|
||||||
|
model_provider_name=dataset.embedding_model_provider,
|
||||||
|
model_name=dataset.embedding_model
|
||||||
|
)
|
||||||
|
except LLMBadRequestError:
|
||||||
|
raise ProviderNotInitializeError(
|
||||||
|
f"No Embedding Model available. Please configure a valid provider "
|
||||||
|
f"in the Settings -> Model Provider.")
|
||||||
|
except ProviderTokenNotInitError as ex:
|
||||||
|
raise ProviderNotInitializeError(ex.description)
|
||||||
try:
|
try:
|
||||||
DatasetService.check_dataset_permission(dataset, current_user)
|
DatasetService.check_dataset_permission(dataset, current_user)
|
||||||
except services.errors.account.NoPermissionError as e:
|
except services.errors.account.NoPermissionError as e:
|
||||||
@ -233,7 +267,7 @@ class DatasetDocumentSegmentAddApi(Resource):
|
|||||||
parser.add_argument('keywords', type=list, required=False, nullable=True, location='json')
|
parser.add_argument('keywords', type=list, required=False, nullable=True, location='json')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
SegmentService.segment_create_args_validate(args, document)
|
SegmentService.segment_create_args_validate(args, document)
|
||||||
segment = SegmentService.create_segment(args, document)
|
segment = SegmentService.create_segment(args, document, dataset)
|
||||||
return {
|
return {
|
||||||
'data': marshal(segment, segment_fields),
|
'data': marshal(segment, segment_fields),
|
||||||
'doc_form': document.doc_form
|
'doc_form': document.doc_form
|
||||||
@ -245,6 +279,61 @@ class DatasetDocumentSegmentUpdateApi(Resource):
|
|||||||
@login_required
|
@login_required
|
||||||
@account_initialization_required
|
@account_initialization_required
|
||||||
def patch(self, dataset_id, document_id, segment_id):
|
def patch(self, dataset_id, document_id, segment_id):
|
||||||
|
# check dataset
|
||||||
|
dataset_id = str(dataset_id)
|
||||||
|
dataset = DatasetService.get_dataset(dataset_id)
|
||||||
|
if not dataset:
|
||||||
|
raise NotFound('Dataset not found.')
|
||||||
|
# check document
|
||||||
|
document_id = str(document_id)
|
||||||
|
document = DocumentService.get_document(dataset_id, document_id)
|
||||||
|
if not document:
|
||||||
|
raise NotFound('Document not found.')
|
||||||
|
# check embedding model setting
|
||||||
|
try:
|
||||||
|
ModelFactory.get_embedding_model(
|
||||||
|
tenant_id=current_user.current_tenant_id,
|
||||||
|
model_provider_name=dataset.embedding_model_provider,
|
||||||
|
model_name=dataset.embedding_model
|
||||||
|
)
|
||||||
|
except LLMBadRequestError:
|
||||||
|
raise ProviderNotInitializeError(
|
||||||
|
f"No Embedding Model available. Please configure a valid provider "
|
||||||
|
f"in the Settings -> Model Provider.")
|
||||||
|
except ProviderTokenNotInitError as ex:
|
||||||
|
raise ProviderNotInitializeError(ex.description)
|
||||||
|
# check segment
|
||||||
|
segment_id = str(segment_id)
|
||||||
|
segment = DocumentSegment.query.filter(
|
||||||
|
DocumentSegment.id == str(segment_id),
|
||||||
|
DocumentSegment.tenant_id == current_user.current_tenant_id
|
||||||
|
).first()
|
||||||
|
if not segment:
|
||||||
|
raise NotFound('Segment not found.')
|
||||||
|
# The role of the current user in the ta table must be admin or owner
|
||||||
|
if current_user.current_tenant.current_role not in ['admin', 'owner']:
|
||||||
|
raise Forbidden()
|
||||||
|
try:
|
||||||
|
DatasetService.check_dataset_permission(dataset, current_user)
|
||||||
|
except services.errors.account.NoPermissionError as e:
|
||||||
|
raise Forbidden(str(e))
|
||||||
|
# validate args
|
||||||
|
parser = reqparse.RequestParser()
|
||||||
|
parser.add_argument('content', type=str, required=True, nullable=False, location='json')
|
||||||
|
parser.add_argument('answer', type=str, required=False, nullable=True, location='json')
|
||||||
|
parser.add_argument('keywords', type=list, required=False, nullable=True, location='json')
|
||||||
|
args = parser.parse_args()
|
||||||
|
SegmentService.segment_create_args_validate(args, document)
|
||||||
|
segment = SegmentService.update_segment(args, segment, document, dataset)
|
||||||
|
return {
|
||||||
|
'data': marshal(segment, segment_fields),
|
||||||
|
'doc_form': document.doc_form
|
||||||
|
}, 200
|
||||||
|
|
||||||
|
@setup_required
|
||||||
|
@login_required
|
||||||
|
@account_initialization_required
|
||||||
|
def delete(self, dataset_id, document_id, segment_id):
|
||||||
# check dataset
|
# check dataset
|
||||||
dataset_id = str(dataset_id)
|
dataset_id = str(dataset_id)
|
||||||
dataset = DatasetService.get_dataset(dataset_id)
|
dataset = DatasetService.get_dataset(dataset_id)
|
||||||
@ -270,17 +359,88 @@ class DatasetDocumentSegmentUpdateApi(Resource):
|
|||||||
DatasetService.check_dataset_permission(dataset, current_user)
|
DatasetService.check_dataset_permission(dataset, current_user)
|
||||||
except services.errors.account.NoPermissionError as e:
|
except services.errors.account.NoPermissionError as e:
|
||||||
raise Forbidden(str(e))
|
raise Forbidden(str(e))
|
||||||
# validate args
|
SegmentService.delete_segment(segment, document, dataset)
|
||||||
parser = reqparse.RequestParser()
|
return {'result': 'success'}, 200
|
||||||
parser.add_argument('content', type=str, required=True, nullable=False, location='json')
|
|
||||||
parser.add_argument('answer', type=str, required=False, nullable=True, location='json')
|
|
||||||
parser.add_argument('keywords', type=list, required=False, nullable=True, location='json')
|
class DatasetDocumentSegmentBatchImportApi(Resource):
|
||||||
args = parser.parse_args()
|
@setup_required
|
||||||
SegmentService.segment_create_args_validate(args, document)
|
@login_required
|
||||||
segment = SegmentService.update_segment(args, segment, document)
|
@account_initialization_required
|
||||||
|
def post(self, dataset_id, document_id):
|
||||||
|
# check dataset
|
||||||
|
dataset_id = str(dataset_id)
|
||||||
|
dataset = DatasetService.get_dataset(dataset_id)
|
||||||
|
if not dataset:
|
||||||
|
raise NotFound('Dataset not found.')
|
||||||
|
# check document
|
||||||
|
document_id = str(document_id)
|
||||||
|
document = DocumentService.get_document(dataset_id, document_id)
|
||||||
|
if not document:
|
||||||
|
raise NotFound('Document not found.')
|
||||||
|
try:
|
||||||
|
ModelFactory.get_embedding_model(
|
||||||
|
tenant_id=current_user.current_tenant_id,
|
||||||
|
model_provider_name=dataset.embedding_model_provider,
|
||||||
|
model_name=dataset.embedding_model
|
||||||
|
)
|
||||||
|
except LLMBadRequestError:
|
||||||
|
raise ProviderNotInitializeError(
|
||||||
|
f"No Embedding Model available. Please configure a valid provider "
|
||||||
|
f"in the Settings -> Model Provider.")
|
||||||
|
except ProviderTokenNotInitError as ex:
|
||||||
|
raise ProviderNotInitializeError(ex.description)
|
||||||
|
# get file from request
|
||||||
|
file = request.files['file']
|
||||||
|
# check file
|
||||||
|
if 'file' not in request.files:
|
||||||
|
raise NoFileUploadedError()
|
||||||
|
|
||||||
|
if len(request.files) > 1:
|
||||||
|
raise TooManyFilesError()
|
||||||
|
# check file type
|
||||||
|
if not file.filename.endswith('.csv'):
|
||||||
|
raise ValueError("Invalid file type. Only CSV files are allowed")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Skip the first row
|
||||||
|
df = pd.read_csv(file)
|
||||||
|
result = []
|
||||||
|
for index, row in df.iterrows():
|
||||||
|
if document.doc_form == 'qa_model':
|
||||||
|
data = {'content': row[0], 'answer': row[1]}
|
||||||
|
else:
|
||||||
|
data = {'content': row[0]}
|
||||||
|
result.append(data)
|
||||||
|
if len(result) == 0:
|
||||||
|
raise ValueError("The CSV file is empty.")
|
||||||
|
# async job
|
||||||
|
job_id = str(uuid.uuid4())
|
||||||
|
indexing_cache_key = 'segment_batch_import_{}'.format(str(job_id))
|
||||||
|
# send batch add segments task
|
||||||
|
redis_client.setnx(indexing_cache_key, 'waiting')
|
||||||
|
batch_create_segment_to_index_task.delay(str(job_id), result, dataset_id, document_id,
|
||||||
|
current_user.current_tenant_id, current_user.id)
|
||||||
|
except Exception as e:
|
||||||
|
return {'error': str(e)}, 500
|
||||||
return {
|
return {
|
||||||
'data': marshal(segment, segment_fields),
|
'job_id': job_id,
|
||||||
'doc_form': document.doc_form
|
'job_status': 'waiting'
|
||||||
|
}, 200
|
||||||
|
|
||||||
|
@setup_required
|
||||||
|
@login_required
|
||||||
|
@account_initialization_required
|
||||||
|
def get(self, job_id):
|
||||||
|
job_id = str(job_id)
|
||||||
|
indexing_cache_key = 'segment_batch_import_{}'.format(job_id)
|
||||||
|
cache_result = redis_client.get(indexing_cache_key)
|
||||||
|
if cache_result is None:
|
||||||
|
raise ValueError("The job is not exist.")
|
||||||
|
|
||||||
|
return {
|
||||||
|
'job_id': job_id,
|
||||||
|
'job_status': cache_result.decode()
|
||||||
}, 200
|
}, 200
|
||||||
|
|
||||||
|
|
||||||
@ -292,3 +452,6 @@ api.add_resource(DatasetDocumentSegmentAddApi,
|
|||||||
'/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segment')
|
'/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segment')
|
||||||
api.add_resource(DatasetDocumentSegmentUpdateApi,
|
api.add_resource(DatasetDocumentSegmentUpdateApi,
|
||||||
'/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments/<uuid:segment_id>')
|
'/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments/<uuid:segment_id>')
|
||||||
|
api.add_resource(DatasetDocumentSegmentBatchImportApi,
|
||||||
|
'/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments/batch_import',
|
||||||
|
'/datasets/batch_import_status/<uuid:job_id>')
|
||||||
|
@ -11,7 +11,8 @@ from controllers.console.app.error import ProviderNotInitializeError, ProviderQu
|
|||||||
from controllers.console.datasets.error import HighQualityDatasetOnlyError, DatasetNotInitializedError
|
from controllers.console.datasets.error import HighQualityDatasetOnlyError, DatasetNotInitializedError
|
||||||
from controllers.console.setup import setup_required
|
from controllers.console.setup import setup_required
|
||||||
from controllers.console.wraps import account_initialization_required
|
from controllers.console.wraps import account_initialization_required
|
||||||
from core.model_providers.error import ProviderTokenNotInitError, QuotaExceededError, ModelCurrentlyNotSupportError
|
from core.model_providers.error import ProviderTokenNotInitError, QuotaExceededError, ModelCurrentlyNotSupportError, \
|
||||||
|
LLMBadRequestError
|
||||||
from libs.helper import TimestampField
|
from libs.helper import TimestampField
|
||||||
from services.dataset_service import DatasetService
|
from services.dataset_service import DatasetService
|
||||||
from services.hit_testing_service import HitTestingService
|
from services.hit_testing_service import HitTestingService
|
||||||
@ -102,6 +103,10 @@ class HitTestingApi(Resource):
|
|||||||
raise ProviderQuotaExceededError()
|
raise ProviderQuotaExceededError()
|
||||||
except ModelCurrentlyNotSupportError:
|
except ModelCurrentlyNotSupportError:
|
||||||
raise ProviderModelCurrentlyNotSupportError()
|
raise ProviderModelCurrentlyNotSupportError()
|
||||||
|
except LLMBadRequestError:
|
||||||
|
raise ProviderNotInitializeError(
|
||||||
|
f"No Embedding Model available. Please configure a valid provider "
|
||||||
|
f"in the Settings -> Model Provider.")
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
raise ValueError(str(e))
|
raise ValueError(str(e))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -69,7 +69,9 @@ class DatesetDocumentStore:
|
|||||||
max_position = 0
|
max_position = 0
|
||||||
|
|
||||||
embedding_model = ModelFactory.get_embedding_model(
|
embedding_model = ModelFactory.get_embedding_model(
|
||||||
tenant_id=self._dataset.tenant_id
|
tenant_id=self._dataset.tenant_id,
|
||||||
|
model_provider_name=self._dataset.embedding_model_provider,
|
||||||
|
model_name=self._dataset.embedding_model
|
||||||
)
|
)
|
||||||
|
|
||||||
for doc in docs:
|
for doc in docs:
|
||||||
|
@ -179,8 +179,8 @@ class LLMGenerator:
|
|||||||
return rule_config
|
return rule_config
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def generate_qa_document(cls, tenant_id: str, query):
|
def generate_qa_document(cls, tenant_id: str, query, document_language: str):
|
||||||
prompt = GENERATOR_QA_PROMPT
|
prompt = GENERATOR_QA_PROMPT.format(language=document_language)
|
||||||
|
|
||||||
model_instance = ModelFactory.get_text_generation_model(
|
model_instance = ModelFactory.get_text_generation_model(
|
||||||
tenant_id=tenant_id,
|
tenant_id=tenant_id,
|
||||||
|
@ -15,7 +15,9 @@ class IndexBuilder:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
embedding_model = ModelFactory.get_embedding_model(
|
embedding_model = ModelFactory.get_embedding_model(
|
||||||
tenant_id=dataset.tenant_id
|
tenant_id=dataset.tenant_id,
|
||||||
|
model_provider_name=dataset.embedding_model_provider,
|
||||||
|
model_name=dataset.embedding_model
|
||||||
)
|
)
|
||||||
|
|
||||||
embeddings = CacheEmbedding(embedding_model)
|
embeddings = CacheEmbedding(embedding_model)
|
||||||
|
@ -67,14 +67,6 @@ class IndexingRunner:
|
|||||||
dataset_document=dataset_document,
|
dataset_document=dataset_document,
|
||||||
processing_rule=processing_rule
|
processing_rule=processing_rule
|
||||||
)
|
)
|
||||||
# new_documents = []
|
|
||||||
# for document in documents:
|
|
||||||
# response = LLMGenerator.generate_qa_document(dataset.tenant_id, document.page_content)
|
|
||||||
# document_qa_list = self.format_split_text(response)
|
|
||||||
# for result in document_qa_list:
|
|
||||||
# document = Document(page_content=result['question'], metadata={'source': result['answer']})
|
|
||||||
# new_documents.append(document)
|
|
||||||
# build index
|
|
||||||
self._build_index(
|
self._build_index(
|
||||||
dataset=dataset,
|
dataset=dataset,
|
||||||
dataset_document=dataset_document,
|
dataset_document=dataset_document,
|
||||||
@ -225,14 +217,25 @@ class IndexingRunner:
|
|||||||
db.session.commit()
|
db.session.commit()
|
||||||
|
|
||||||
def file_indexing_estimate(self, tenant_id: str, file_details: List[UploadFile], tmp_processing_rule: dict,
|
def file_indexing_estimate(self, tenant_id: str, file_details: List[UploadFile], tmp_processing_rule: dict,
|
||||||
doc_form: str = None) -> dict:
|
doc_form: str = None, doc_language: str = 'English', dataset_id: str = None) -> dict:
|
||||||
"""
|
"""
|
||||||
Estimate the indexing for the document.
|
Estimate the indexing for the document.
|
||||||
"""
|
"""
|
||||||
|
if dataset_id:
|
||||||
|
dataset = Dataset.query.filter_by(
|
||||||
|
id=dataset_id
|
||||||
|
).first()
|
||||||
|
if not dataset:
|
||||||
|
raise ValueError('Dataset not found.')
|
||||||
|
embedding_model = ModelFactory.get_embedding_model(
|
||||||
|
tenant_id=dataset.tenant_id,
|
||||||
|
model_provider_name=dataset.embedding_model_provider,
|
||||||
|
model_name=dataset.embedding_model
|
||||||
|
)
|
||||||
|
else:
|
||||||
embedding_model = ModelFactory.get_embedding_model(
|
embedding_model = ModelFactory.get_embedding_model(
|
||||||
tenant_id=tenant_id
|
tenant_id=tenant_id
|
||||||
)
|
)
|
||||||
|
|
||||||
tokens = 0
|
tokens = 0
|
||||||
preview_texts = []
|
preview_texts = []
|
||||||
total_segments = 0
|
total_segments = 0
|
||||||
@ -263,14 +266,13 @@ class IndexingRunner:
|
|||||||
|
|
||||||
tokens += embedding_model.get_num_tokens(self.filter_string(document.page_content))
|
tokens += embedding_model.get_num_tokens(self.filter_string(document.page_content))
|
||||||
|
|
||||||
|
if doc_form and doc_form == 'qa_model':
|
||||||
text_generation_model = ModelFactory.get_text_generation_model(
|
text_generation_model = ModelFactory.get_text_generation_model(
|
||||||
tenant_id=tenant_id
|
tenant_id=tenant_id
|
||||||
)
|
)
|
||||||
|
|
||||||
if doc_form and doc_form == 'qa_model':
|
|
||||||
if len(preview_texts) > 0:
|
if len(preview_texts) > 0:
|
||||||
# qa model document
|
# qa model document
|
||||||
response = LLMGenerator.generate_qa_document(current_user.current_tenant_id, preview_texts[0])
|
response = LLMGenerator.generate_qa_document(current_user.current_tenant_id, preview_texts[0], doc_language)
|
||||||
document_qa_list = self.format_split_text(response)
|
document_qa_list = self.format_split_text(response)
|
||||||
return {
|
return {
|
||||||
"total_segments": total_segments * 20,
|
"total_segments": total_segments * 20,
|
||||||
@ -289,10 +291,23 @@ class IndexingRunner:
|
|||||||
"preview": preview_texts
|
"preview": preview_texts
|
||||||
}
|
}
|
||||||
|
|
||||||
def notion_indexing_estimate(self, tenant_id: str, notion_info_list: list, tmp_processing_rule: dict, doc_form: str = None) -> dict:
|
def notion_indexing_estimate(self, tenant_id: str, notion_info_list: list, tmp_processing_rule: dict,
|
||||||
|
doc_form: str = None, doc_language: str = 'English', dataset_id: str = None) -> dict:
|
||||||
"""
|
"""
|
||||||
Estimate the indexing for the document.
|
Estimate the indexing for the document.
|
||||||
"""
|
"""
|
||||||
|
if dataset_id:
|
||||||
|
dataset = Dataset.query.filter_by(
|
||||||
|
id=dataset_id
|
||||||
|
).first()
|
||||||
|
if not dataset:
|
||||||
|
raise ValueError('Dataset not found.')
|
||||||
|
embedding_model = ModelFactory.get_embedding_model(
|
||||||
|
tenant_id=dataset.tenant_id,
|
||||||
|
model_provider_name=dataset.embedding_model_provider,
|
||||||
|
model_name=dataset.embedding_model
|
||||||
|
)
|
||||||
|
else:
|
||||||
embedding_model = ModelFactory.get_embedding_model(
|
embedding_model = ModelFactory.get_embedding_model(
|
||||||
tenant_id=tenant_id
|
tenant_id=tenant_id
|
||||||
)
|
)
|
||||||
@ -344,14 +359,13 @@ class IndexingRunner:
|
|||||||
|
|
||||||
tokens += embedding_model.get_num_tokens(document.page_content)
|
tokens += embedding_model.get_num_tokens(document.page_content)
|
||||||
|
|
||||||
|
if doc_form and doc_form == 'qa_model':
|
||||||
text_generation_model = ModelFactory.get_text_generation_model(
|
text_generation_model = ModelFactory.get_text_generation_model(
|
||||||
tenant_id=tenant_id
|
tenant_id=tenant_id
|
||||||
)
|
)
|
||||||
|
|
||||||
if doc_form and doc_form == 'qa_model':
|
|
||||||
if len(preview_texts) > 0:
|
if len(preview_texts) > 0:
|
||||||
# qa model document
|
# qa model document
|
||||||
response = LLMGenerator.generate_qa_document(current_user.current_tenant_id, preview_texts[0])
|
response = LLMGenerator.generate_qa_document(current_user.current_tenant_id, preview_texts[0], doc_language)
|
||||||
document_qa_list = self.format_split_text(response)
|
document_qa_list = self.format_split_text(response)
|
||||||
return {
|
return {
|
||||||
"total_segments": total_segments * 20,
|
"total_segments": total_segments * 20,
|
||||||
@ -458,7 +472,8 @@ class IndexingRunner:
|
|||||||
splitter=splitter,
|
splitter=splitter,
|
||||||
processing_rule=processing_rule,
|
processing_rule=processing_rule,
|
||||||
tenant_id=dataset.tenant_id,
|
tenant_id=dataset.tenant_id,
|
||||||
document_form=dataset_document.doc_form
|
document_form=dataset_document.doc_form,
|
||||||
|
document_language=dataset_document.doc_language
|
||||||
)
|
)
|
||||||
|
|
||||||
# save node to document segment
|
# save node to document segment
|
||||||
@ -494,7 +509,8 @@ class IndexingRunner:
|
|||||||
return documents
|
return documents
|
||||||
|
|
||||||
def _split_to_documents(self, text_docs: List[Document], splitter: TextSplitter,
|
def _split_to_documents(self, text_docs: List[Document], splitter: TextSplitter,
|
||||||
processing_rule: DatasetProcessRule, tenant_id: str, document_form: str) -> List[Document]:
|
processing_rule: DatasetProcessRule, tenant_id: str,
|
||||||
|
document_form: str, document_language: str) -> List[Document]:
|
||||||
"""
|
"""
|
||||||
Split the text documents into nodes.
|
Split the text documents into nodes.
|
||||||
"""
|
"""
|
||||||
@ -523,8 +539,9 @@ class IndexingRunner:
|
|||||||
sub_documents = all_documents[i:i + 10]
|
sub_documents = all_documents[i:i + 10]
|
||||||
for doc in sub_documents:
|
for doc in sub_documents:
|
||||||
document_format_thread = threading.Thread(target=self.format_qa_document, kwargs={
|
document_format_thread = threading.Thread(target=self.format_qa_document, kwargs={
|
||||||
'flask_app': current_app._get_current_object(), 'tenant_id': tenant_id, 'document_node': doc,
|
'flask_app': current_app._get_current_object(),
|
||||||
'all_qa_documents': all_qa_documents})
|
'tenant_id': tenant_id, 'document_node': doc, 'all_qa_documents': all_qa_documents,
|
||||||
|
'document_language': document_language})
|
||||||
threads.append(document_format_thread)
|
threads.append(document_format_thread)
|
||||||
document_format_thread.start()
|
document_format_thread.start()
|
||||||
for thread in threads:
|
for thread in threads:
|
||||||
@ -532,14 +549,14 @@ class IndexingRunner:
|
|||||||
return all_qa_documents
|
return all_qa_documents
|
||||||
return all_documents
|
return all_documents
|
||||||
|
|
||||||
def format_qa_document(self, flask_app: Flask, tenant_id: str, document_node, all_qa_documents):
|
def format_qa_document(self, flask_app: Flask, tenant_id: str, document_node, all_qa_documents, document_language):
|
||||||
format_documents = []
|
format_documents = []
|
||||||
if document_node.page_content is None or not document_node.page_content.strip():
|
if document_node.page_content is None or not document_node.page_content.strip():
|
||||||
return
|
return
|
||||||
with flask_app.app_context():
|
with flask_app.app_context():
|
||||||
try:
|
try:
|
||||||
# qa model document
|
# qa model document
|
||||||
response = LLMGenerator.generate_qa_document(tenant_id, document_node.page_content)
|
response = LLMGenerator.generate_qa_document(tenant_id, document_node.page_content, document_language)
|
||||||
document_qa_list = self.format_split_text(response)
|
document_qa_list = self.format_split_text(response)
|
||||||
qa_documents = []
|
qa_documents = []
|
||||||
for result in document_qa_list:
|
for result in document_qa_list:
|
||||||
@ -641,7 +658,9 @@ class IndexingRunner:
|
|||||||
keyword_table_index = IndexBuilder.get_index(dataset, 'economy')
|
keyword_table_index = IndexBuilder.get_index(dataset, 'economy')
|
||||||
|
|
||||||
embedding_model = ModelFactory.get_embedding_model(
|
embedding_model = ModelFactory.get_embedding_model(
|
||||||
tenant_id=dataset.tenant_id
|
tenant_id=dataset.tenant_id,
|
||||||
|
model_provider_name=dataset.embedding_model_provider,
|
||||||
|
model_name=dataset.embedding_model
|
||||||
)
|
)
|
||||||
|
|
||||||
# chunk nodes by chunk size
|
# chunk nodes by chunk size
|
||||||
@ -722,6 +741,32 @@ class IndexingRunner:
|
|||||||
DocumentSegment.query.filter_by(document_id=dataset_document_id).update(update_params)
|
DocumentSegment.query.filter_by(document_id=dataset_document_id).update(update_params)
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
|
|
||||||
|
def batch_add_segments(self, segments: List[DocumentSegment], dataset: Dataset):
|
||||||
|
"""
|
||||||
|
Batch add segments index processing
|
||||||
|
"""
|
||||||
|
documents = []
|
||||||
|
for segment in segments:
|
||||||
|
document = Document(
|
||||||
|
page_content=segment.content,
|
||||||
|
metadata={
|
||||||
|
"doc_id": segment.index_node_id,
|
||||||
|
"doc_hash": segment.index_node_hash,
|
||||||
|
"document_id": segment.document_id,
|
||||||
|
"dataset_id": segment.dataset_id,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
documents.append(document)
|
||||||
|
# save vector index
|
||||||
|
index = IndexBuilder.get_index(dataset, 'high_quality')
|
||||||
|
if index:
|
||||||
|
index.add_texts(documents, duplicate_check=True)
|
||||||
|
|
||||||
|
# save keyword index
|
||||||
|
index = IndexBuilder.get_index(dataset, 'economy')
|
||||||
|
if index:
|
||||||
|
index.add_texts(documents)
|
||||||
|
|
||||||
|
|
||||||
class DocumentIsPausedException(Exception):
|
class DocumentIsPausedException(Exception):
|
||||||
pass
|
pass
|
||||||
|
@ -44,13 +44,13 @@ SUGGESTED_QUESTIONS_AFTER_ANSWER_INSTRUCTION_PROMPT = (
|
|||||||
)
|
)
|
||||||
|
|
||||||
GENERATOR_QA_PROMPT = (
|
GENERATOR_QA_PROMPT = (
|
||||||
"Please respond according to the language of the user's input text. If the text is in language [A], you must also reply in language [A].\n"
|
'The user will send a long text. Please think step by step.'
|
||||||
'Step 1: Understand and summarize the main content of this text.\n'
|
'Step 1: Understand and summarize the main content of this text.\n'
|
||||||
'Step 2: What key information or concepts are mentioned in this text?\n'
|
'Step 2: What key information or concepts are mentioned in this text?\n'
|
||||||
'Step 3: Decompose or combine multiple pieces of information and concepts.\n'
|
'Step 3: Decompose or combine multiple pieces of information and concepts.\n'
|
||||||
'Step 4: Generate 20 questions and answers based on these key information and concepts.'
|
'Step 4: Generate 20 questions and answers based on these key information and concepts.'
|
||||||
'The questions should be clear and detailed, and the answers should be detailed and complete.\n'
|
'The questions should be clear and detailed, and the answers should be detailed and complete.\n'
|
||||||
"Answer in the following format: Q1:\nA1:\nQ2:\nA2:...\n"
|
"Answer must be the language:{language} and in the following format: Q1:\nA1:\nQ2:\nA2:...\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
RULE_CONFIG_GENERATE_TEMPLATE = """Given MY INTENDED AUDIENCES and HOPING TO SOLVE using a language model, please select \
|
RULE_CONFIG_GENERATE_TEMPLATE = """Given MY INTENDED AUDIENCES and HOPING TO SOLVE using a language model, please select \
|
||||||
|
@ -9,6 +9,7 @@ from core.callback_handler.index_tool_callback_handler import DatasetIndexToolCa
|
|||||||
from core.embedding.cached_embedding import CacheEmbedding
|
from core.embedding.cached_embedding import CacheEmbedding
|
||||||
from core.index.keyword_table_index.keyword_table_index import KeywordTableIndex, KeywordTableConfig
|
from core.index.keyword_table_index.keyword_table_index import KeywordTableIndex, KeywordTableConfig
|
||||||
from core.index.vector_index.vector_index import VectorIndex
|
from core.index.vector_index.vector_index import VectorIndex
|
||||||
|
from core.model_providers.error import LLMBadRequestError, ProviderTokenNotInitError
|
||||||
from core.model_providers.model_factory import ModelFactory
|
from core.model_providers.model_factory import ModelFactory
|
||||||
from extensions.ext_database import db
|
from extensions.ext_database import db
|
||||||
from models.dataset import Dataset, DocumentSegment
|
from models.dataset import Dataset, DocumentSegment
|
||||||
@ -70,10 +71,17 @@ class DatasetRetrieverTool(BaseTool):
|
|||||||
documents = kw_table_index.search(query, search_kwargs={'k': self.k})
|
documents = kw_table_index.search(query, search_kwargs={'k': self.k})
|
||||||
return str("\n".join([document.page_content for document in documents]))
|
return str("\n".join([document.page_content for document in documents]))
|
||||||
else:
|
else:
|
||||||
embedding_model = ModelFactory.get_embedding_model(
|
|
||||||
tenant_id=dataset.tenant_id
|
|
||||||
)
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
embedding_model = ModelFactory.get_embedding_model(
|
||||||
|
tenant_id=dataset.tenant_id,
|
||||||
|
model_provider_name=dataset.embedding_model_provider,
|
||||||
|
model_name=dataset.embedding_model
|
||||||
|
)
|
||||||
|
except LLMBadRequestError:
|
||||||
|
return ''
|
||||||
|
except ProviderTokenNotInitError:
|
||||||
|
return ''
|
||||||
embeddings = CacheEmbedding(embedding_model)
|
embeddings = CacheEmbedding(embedding_model)
|
||||||
|
|
||||||
vector_index = VectorIndex(
|
vector_index = VectorIndex(
|
||||||
|
@ -0,0 +1,32 @@
|
|||||||
|
"""add_qa_document_language
|
||||||
|
|
||||||
|
Revision ID: 2c8af9671032
|
||||||
|
Revises: 8d2d099ceb74
|
||||||
|
Create Date: 2023-08-01 18:57:27.294973
|
||||||
|
|
||||||
|
"""
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision = '2c8af9671032'
|
||||||
|
down_revision = '5022897aaceb'
|
||||||
|
branch_labels = None
|
||||||
|
depends_on = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade():
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
with op.batch_alter_table('documents', schema=None) as batch_op:
|
||||||
|
batch_op.add_column(sa.Column('doc_language', sa.String(length=255), nullable=True))
|
||||||
|
|
||||||
|
# ### end Alembic commands ###
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade():
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
with op.batch_alter_table('documents', schema=None) as batch_op:
|
||||||
|
batch_op.drop_column('doc_language')
|
||||||
|
|
||||||
|
# ### end Alembic commands ###
|
@ -0,0 +1,34 @@
|
|||||||
|
"""add_dataset_model_name
|
||||||
|
|
||||||
|
Revision ID: e8883b0148c9
|
||||||
|
Revises: 2c8af9671032
|
||||||
|
Create Date: 2023-08-15 20:54:58.936787
|
||||||
|
|
||||||
|
"""
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision = 'e8883b0148c9'
|
||||||
|
down_revision = '2c8af9671032'
|
||||||
|
branch_labels = None
|
||||||
|
depends_on = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade():
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
with op.batch_alter_table('datasets', schema=None) as batch_op:
|
||||||
|
batch_op.add_column(sa.Column('embedding_model', sa.String(length=255), server_default=sa.text("'text-embedding-ada-002'::character varying"), nullable=False))
|
||||||
|
batch_op.add_column(sa.Column('embedding_model_provider', sa.String(length=255), server_default=sa.text("'openai'::character varying"), nullable=False))
|
||||||
|
|
||||||
|
# ### end Alembic commands ###
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade():
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
with op.batch_alter_table('datasets', schema=None) as batch_op:
|
||||||
|
batch_op.drop_column('embedding_model_provider')
|
||||||
|
batch_op.drop_column('embedding_model')
|
||||||
|
|
||||||
|
# ### end Alembic commands ###
|
@ -36,6 +36,10 @@ class Dataset(db.Model):
|
|||||||
updated_by = db.Column(UUID, nullable=True)
|
updated_by = db.Column(UUID, nullable=True)
|
||||||
updated_at = db.Column(db.DateTime, nullable=False,
|
updated_at = db.Column(db.DateTime, nullable=False,
|
||||||
server_default=db.text('CURRENT_TIMESTAMP(0)'))
|
server_default=db.text('CURRENT_TIMESTAMP(0)'))
|
||||||
|
embedding_model = db.Column(db.String(
|
||||||
|
255), nullable=False, server_default=db.text("'text-embedding-ada-002'::character varying"))
|
||||||
|
embedding_model_provider = db.Column(db.String(
|
||||||
|
255), nullable=False, server_default=db.text("'openai'::character varying"))
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def dataset_keyword_table(self):
|
def dataset_keyword_table(self):
|
||||||
@ -209,6 +213,7 @@ class Document(db.Model):
|
|||||||
doc_metadata = db.Column(db.JSON, nullable=True)
|
doc_metadata = db.Column(db.JSON, nullable=True)
|
||||||
doc_form = db.Column(db.String(
|
doc_form = db.Column(db.String(
|
||||||
255), nullable=False, server_default=db.text("'text_model'::character varying"))
|
255), nullable=False, server_default=db.text("'text_model'::character varying"))
|
||||||
|
doc_language = db.Column(db.String(255), nullable=True)
|
||||||
|
|
||||||
DATA_SOURCES = ['upload_file', 'notion_import']
|
DATA_SOURCES = ['upload_file', 'notion_import']
|
||||||
|
|
||||||
|
@ -48,3 +48,4 @@ dashscope~=1.5.0
|
|||||||
huggingface_hub~=0.16.4
|
huggingface_hub~=0.16.4
|
||||||
transformers~=4.31.0
|
transformers~=4.31.0
|
||||||
stripe~=5.5.0
|
stripe~=5.5.0
|
||||||
|
pandas==1.5.3
|
@ -9,6 +9,7 @@ from typing import Optional, List
|
|||||||
from flask import current_app
|
from flask import current_app
|
||||||
from sqlalchemy import func
|
from sqlalchemy import func
|
||||||
|
|
||||||
|
from core.index.index import IndexBuilder
|
||||||
from core.model_providers.model_factory import ModelFactory
|
from core.model_providers.model_factory import ModelFactory
|
||||||
from extensions.ext_redis import redis_client
|
from extensions.ext_redis import redis_client
|
||||||
from flask_login import current_user
|
from flask_login import current_user
|
||||||
@ -25,14 +26,16 @@ from services.errors.account import NoPermissionError
|
|||||||
from services.errors.dataset import DatasetNameDuplicateError
|
from services.errors.dataset import DatasetNameDuplicateError
|
||||||
from services.errors.document import DocumentIndexingError
|
from services.errors.document import DocumentIndexingError
|
||||||
from services.errors.file import FileNotExistsError
|
from services.errors.file import FileNotExistsError
|
||||||
|
from services.vector_service import VectorService
|
||||||
from tasks.clean_notion_document_task import clean_notion_document_task
|
from tasks.clean_notion_document_task import clean_notion_document_task
|
||||||
from tasks.deal_dataset_vector_index_task import deal_dataset_vector_index_task
|
from tasks.deal_dataset_vector_index_task import deal_dataset_vector_index_task
|
||||||
from tasks.document_indexing_task import document_indexing_task
|
from tasks.document_indexing_task import document_indexing_task
|
||||||
from tasks.document_indexing_update_task import document_indexing_update_task
|
from tasks.document_indexing_update_task import document_indexing_update_task
|
||||||
from tasks.create_segment_to_index_task import create_segment_to_index_task
|
from tasks.create_segment_to_index_task import create_segment_to_index_task
|
||||||
from tasks.update_segment_index_task import update_segment_index_task
|
from tasks.update_segment_index_task import update_segment_index_task
|
||||||
from tasks.update_segment_keyword_index_task\
|
from tasks.recover_document_indexing_task import recover_document_indexing_task
|
||||||
import update_segment_keyword_index_task
|
from tasks.update_segment_keyword_index_task import update_segment_keyword_index_task
|
||||||
|
from tasks.delete_segment_from_index_task import delete_segment_from_index_task
|
||||||
|
|
||||||
|
|
||||||
class DatasetService:
|
class DatasetService:
|
||||||
@ -88,12 +91,16 @@ class DatasetService:
|
|||||||
if Dataset.query.filter_by(name=name, tenant_id=tenant_id).first():
|
if Dataset.query.filter_by(name=name, tenant_id=tenant_id).first():
|
||||||
raise DatasetNameDuplicateError(
|
raise DatasetNameDuplicateError(
|
||||||
f'Dataset with name {name} already exists.')
|
f'Dataset with name {name} already exists.')
|
||||||
|
embedding_model = ModelFactory.get_embedding_model(
|
||||||
|
tenant_id=current_user.current_tenant_id
|
||||||
|
)
|
||||||
dataset = Dataset(name=name, indexing_technique=indexing_technique)
|
dataset = Dataset(name=name, indexing_technique=indexing_technique)
|
||||||
# dataset = Dataset(name=name, provider=provider, config=config)
|
# dataset = Dataset(name=name, provider=provider, config=config)
|
||||||
dataset.created_by = account.id
|
dataset.created_by = account.id
|
||||||
dataset.updated_by = account.id
|
dataset.updated_by = account.id
|
||||||
dataset.tenant_id = tenant_id
|
dataset.tenant_id = tenant_id
|
||||||
|
dataset.embedding_model_provider = embedding_model.model_provider.provider_name
|
||||||
|
dataset.embedding_model = embedding_model.name
|
||||||
db.session.add(dataset)
|
db.session.add(dataset)
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
return dataset
|
return dataset
|
||||||
@ -372,7 +379,7 @@ class DocumentService:
|
|||||||
indexing_cache_key = 'document_{}_is_paused'.format(document.id)
|
indexing_cache_key = 'document_{}_is_paused'.format(document.id)
|
||||||
redis_client.delete(indexing_cache_key)
|
redis_client.delete(indexing_cache_key)
|
||||||
# trigger async task
|
# trigger async task
|
||||||
document_indexing_task.delay(document.dataset_id, document.id)
|
recover_document_indexing_task.delay(document.dataset_id, document.id)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_documents_position(dataset_id):
|
def get_documents_position(dataset_id):
|
||||||
@ -450,6 +457,7 @@ class DocumentService:
|
|||||||
document = DocumentService.save_document(dataset, dataset_process_rule.id,
|
document = DocumentService.save_document(dataset, dataset_process_rule.id,
|
||||||
document_data["data_source"]["type"],
|
document_data["data_source"]["type"],
|
||||||
document_data["doc_form"],
|
document_data["doc_form"],
|
||||||
|
document_data["doc_language"],
|
||||||
data_source_info, created_from, position,
|
data_source_info, created_from, position,
|
||||||
account, file_name, batch)
|
account, file_name, batch)
|
||||||
db.session.add(document)
|
db.session.add(document)
|
||||||
@ -495,20 +503,11 @@ class DocumentService:
|
|||||||
document = DocumentService.save_document(dataset, dataset_process_rule.id,
|
document = DocumentService.save_document(dataset, dataset_process_rule.id,
|
||||||
document_data["data_source"]["type"],
|
document_data["data_source"]["type"],
|
||||||
document_data["doc_form"],
|
document_data["doc_form"],
|
||||||
|
document_data["doc_language"],
|
||||||
data_source_info, created_from, position,
|
data_source_info, created_from, position,
|
||||||
account, page['page_name'], batch)
|
account, page['page_name'], batch)
|
||||||
# if page['type'] == 'database':
|
|
||||||
# document.splitting_completed_at = datetime.datetime.utcnow()
|
|
||||||
# document.cleaning_completed_at = datetime.datetime.utcnow()
|
|
||||||
# document.parsing_completed_at = datetime.datetime.utcnow()
|
|
||||||
# document.completed_at = datetime.datetime.utcnow()
|
|
||||||
# document.indexing_status = 'completed'
|
|
||||||
# document.word_count = 0
|
|
||||||
# document.tokens = 0
|
|
||||||
# document.indexing_latency = 0
|
|
||||||
db.session.add(document)
|
db.session.add(document)
|
||||||
db.session.flush()
|
db.session.flush()
|
||||||
# if page['type'] != 'database':
|
|
||||||
document_ids.append(document.id)
|
document_ids.append(document.id)
|
||||||
documents.append(document)
|
documents.append(document)
|
||||||
position += 1
|
position += 1
|
||||||
@ -520,15 +519,15 @@ class DocumentService:
|
|||||||
db.session.commit()
|
db.session.commit()
|
||||||
|
|
||||||
# trigger async task
|
# trigger async task
|
||||||
#document_index_created.send(dataset.id, document_ids=document_ids)
|
|
||||||
document_indexing_task.delay(dataset.id, document_ids)
|
document_indexing_task.delay(dataset.id, document_ids)
|
||||||
|
|
||||||
return documents, batch
|
return documents, batch
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def save_document(dataset: Dataset, process_rule_id: str, data_source_type: str, document_form: str,
|
def save_document(dataset: Dataset, process_rule_id: str, data_source_type: str, document_form: str,
|
||||||
data_source_info: dict, created_from: str, position: int, account: Account, name: str,
|
document_language: str, data_source_info: dict, created_from: str, position: int,
|
||||||
batch: str):
|
account: Account,
|
||||||
|
name: str, batch: str):
|
||||||
document = Document(
|
document = Document(
|
||||||
tenant_id=dataset.tenant_id,
|
tenant_id=dataset.tenant_id,
|
||||||
dataset_id=dataset.id,
|
dataset_id=dataset.id,
|
||||||
@ -540,7 +539,8 @@ class DocumentService:
|
|||||||
name=name,
|
name=name,
|
||||||
created_from=created_from,
|
created_from=created_from,
|
||||||
created_by=account.id,
|
created_by=account.id,
|
||||||
doc_form=document_form
|
doc_form=document_form,
|
||||||
|
doc_language=document_language
|
||||||
)
|
)
|
||||||
return document
|
return document
|
||||||
|
|
||||||
@ -654,13 +654,18 @@ class DocumentService:
|
|||||||
tenant_document_count = int(current_app.config['TENANT_DOCUMENT_COUNT'])
|
tenant_document_count = int(current_app.config['TENANT_DOCUMENT_COUNT'])
|
||||||
if documents_count > tenant_document_count:
|
if documents_count > tenant_document_count:
|
||||||
raise ValueError(f"over document limit {tenant_document_count}.")
|
raise ValueError(f"over document limit {tenant_document_count}.")
|
||||||
|
embedding_model = ModelFactory.get_embedding_model(
|
||||||
|
tenant_id=tenant_id
|
||||||
|
)
|
||||||
# save dataset
|
# save dataset
|
||||||
dataset = Dataset(
|
dataset = Dataset(
|
||||||
tenant_id=tenant_id,
|
tenant_id=tenant_id,
|
||||||
name='',
|
name='',
|
||||||
data_source_type=document_data["data_source"]["type"],
|
data_source_type=document_data["data_source"]["type"],
|
||||||
indexing_technique=document_data["indexing_technique"],
|
indexing_technique=document_data["indexing_technique"],
|
||||||
created_by=account.id
|
created_by=account.id,
|
||||||
|
embedding_model=embedding_model.name,
|
||||||
|
embedding_model_provider=embedding_model.model_provider.provider_name
|
||||||
)
|
)
|
||||||
|
|
||||||
db.session.add(dataset)
|
db.session.add(dataset)
|
||||||
@ -870,13 +875,15 @@ class SegmentService:
|
|||||||
raise ValueError("Answer is required")
|
raise ValueError("Answer is required")
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def create_segment(cls, args: dict, document: Document):
|
def create_segment(cls, args: dict, document: Document, dataset: Dataset):
|
||||||
content = args['content']
|
content = args['content']
|
||||||
doc_id = str(uuid.uuid4())
|
doc_id = str(uuid.uuid4())
|
||||||
segment_hash = helper.generate_text_hash(content)
|
segment_hash = helper.generate_text_hash(content)
|
||||||
|
|
||||||
embedding_model = ModelFactory.get_embedding_model(
|
embedding_model = ModelFactory.get_embedding_model(
|
||||||
tenant_id=document.tenant_id
|
tenant_id=dataset.tenant_id,
|
||||||
|
model_provider_name=dataset.embedding_model_provider,
|
||||||
|
model_name=dataset.embedding_model
|
||||||
)
|
)
|
||||||
|
|
||||||
# calc embedding use tokens
|
# calc embedding use tokens
|
||||||
@ -894,6 +901,9 @@ class SegmentService:
|
|||||||
content=content,
|
content=content,
|
||||||
word_count=len(content),
|
word_count=len(content),
|
||||||
tokens=tokens,
|
tokens=tokens,
|
||||||
|
status='completed',
|
||||||
|
indexing_at=datetime.datetime.utcnow(),
|
||||||
|
completed_at=datetime.datetime.utcnow(),
|
||||||
created_by=current_user.id
|
created_by=current_user.id
|
||||||
)
|
)
|
||||||
if document.doc_form == 'qa_model':
|
if document.doc_form == 'qa_model':
|
||||||
@ -901,17 +911,27 @@ class SegmentService:
|
|||||||
|
|
||||||
db.session.add(segment_document)
|
db.session.add(segment_document)
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
indexing_cache_key = 'segment_{}_indexing'.format(segment_document.id)
|
|
||||||
redis_client.setex(indexing_cache_key, 600, 1)
|
# save vector index
|
||||||
create_segment_to_index_task.delay(segment_document.id, args['keywords'])
|
try:
|
||||||
return segment_document
|
VectorService.create_segment_vector(args['keywords'], segment_document, dataset)
|
||||||
|
except Exception as e:
|
||||||
|
logging.exception("create segment index failed")
|
||||||
|
segment_document.enabled = False
|
||||||
|
segment_document.disabled_at = datetime.datetime.utcnow()
|
||||||
|
segment_document.status = 'error'
|
||||||
|
segment_document.error = str(e)
|
||||||
|
db.session.commit()
|
||||||
|
segment = db.session.query(DocumentSegment).filter(DocumentSegment.id == segment_document.id).first()
|
||||||
|
return segment
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def update_segment(cls, args: dict, segment: DocumentSegment, document: Document):
|
def update_segment(cls, args: dict, segment: DocumentSegment, document: Document, dataset: Dataset):
|
||||||
indexing_cache_key = 'segment_{}_indexing'.format(segment.id)
|
indexing_cache_key = 'segment_{}_indexing'.format(segment.id)
|
||||||
cache_result = redis_client.get(indexing_cache_key)
|
cache_result = redis_client.get(indexing_cache_key)
|
||||||
if cache_result is not None:
|
if cache_result is not None:
|
||||||
raise ValueError("Segment is indexing, please try again later")
|
raise ValueError("Segment is indexing, please try again later")
|
||||||
|
try:
|
||||||
content = args['content']
|
content = args['content']
|
||||||
if segment.content == content:
|
if segment.content == content:
|
||||||
if document.doc_form == 'qa_model':
|
if document.doc_form == 'qa_model':
|
||||||
@ -921,13 +941,19 @@ class SegmentService:
|
|||||||
db.session.add(segment)
|
db.session.add(segment)
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
# update segment index task
|
# update segment index task
|
||||||
redis_client.setex(indexing_cache_key, 600, 1)
|
if args['keywords']:
|
||||||
update_segment_keyword_index_task.delay(segment.id)
|
kw_index = IndexBuilder.get_index(dataset, 'economy')
|
||||||
|
# delete from keyword index
|
||||||
|
kw_index.delete_by_ids([segment.index_node_id])
|
||||||
|
# save keyword index
|
||||||
|
kw_index.update_segment_keywords_index(segment.index_node_id, segment.keywords)
|
||||||
else:
|
else:
|
||||||
segment_hash = helper.generate_text_hash(content)
|
segment_hash = helper.generate_text_hash(content)
|
||||||
|
|
||||||
embedding_model = ModelFactory.get_embedding_model(
|
embedding_model = ModelFactory.get_embedding_model(
|
||||||
tenant_id=document.tenant_id
|
tenant_id=dataset.tenant_id,
|
||||||
|
model_provider_name=dataset.embedding_model_provider,
|
||||||
|
model_name=dataset.embedding_model
|
||||||
)
|
)
|
||||||
|
|
||||||
# calc embedding use tokens
|
# calc embedding use tokens
|
||||||
@ -936,14 +962,37 @@ class SegmentService:
|
|||||||
segment.index_node_hash = segment_hash
|
segment.index_node_hash = segment_hash
|
||||||
segment.word_count = len(content)
|
segment.word_count = len(content)
|
||||||
segment.tokens = tokens
|
segment.tokens = tokens
|
||||||
segment.status = 'updating'
|
segment.status = 'completed'
|
||||||
|
segment.indexing_at = datetime.datetime.utcnow()
|
||||||
|
segment.completed_at = datetime.datetime.utcnow()
|
||||||
segment.updated_by = current_user.id
|
segment.updated_by = current_user.id
|
||||||
segment.updated_at = datetime.datetime.utcnow()
|
segment.updated_at = datetime.datetime.utcnow()
|
||||||
if document.doc_form == 'qa_model':
|
if document.doc_form == 'qa_model':
|
||||||
segment.answer = args['answer']
|
segment.answer = args['answer']
|
||||||
db.session.add(segment)
|
db.session.add(segment)
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
# update segment index task
|
# update segment vector index
|
||||||
redis_client.setex(indexing_cache_key, 600, 1)
|
VectorService.create_segment_vector(args['keywords'], segment, dataset)
|
||||||
update_segment_index_task.delay(segment.id, args['keywords'])
|
except Exception as e:
|
||||||
|
logging.exception("update segment index failed")
|
||||||
|
segment.enabled = False
|
||||||
|
segment.disabled_at = datetime.datetime.utcnow()
|
||||||
|
segment.status = 'error'
|
||||||
|
segment.error = str(e)
|
||||||
|
db.session.commit()
|
||||||
|
segment = db.session.query(DocumentSegment).filter(DocumentSegment.id == segment.id).first()
|
||||||
return segment
|
return segment
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def delete_segment(cls, segment: DocumentSegment, document: Document, dataset: Dataset):
|
||||||
|
indexing_cache_key = 'segment_{}_delete_indexing'.format(segment.id)
|
||||||
|
cache_result = redis_client.get(indexing_cache_key)
|
||||||
|
if cache_result is not None:
|
||||||
|
raise ValueError("Segment is deleting.")
|
||||||
|
# send delete segment index task
|
||||||
|
redis_client.setex(indexing_cache_key, 600, 1)
|
||||||
|
# enabled segment need to delete index
|
||||||
|
if segment.enabled:
|
||||||
|
delete_segment_from_index_task.delay(segment.id, segment.index_node_id, dataset.id, document.id)
|
||||||
|
db.session.delete(segment)
|
||||||
|
db.session.commit()
|
||||||
|
@ -29,7 +29,9 @@ class HitTestingService:
|
|||||||
}
|
}
|
||||||
|
|
||||||
embedding_model = ModelFactory.get_embedding_model(
|
embedding_model = ModelFactory.get_embedding_model(
|
||||||
tenant_id=dataset.tenant_id
|
tenant_id=dataset.tenant_id,
|
||||||
|
model_provider_name=dataset.embedding_model_provider,
|
||||||
|
model_name=dataset.embedding_model
|
||||||
)
|
)
|
||||||
|
|
||||||
embeddings = CacheEmbedding(embedding_model)
|
embeddings = CacheEmbedding(embedding_model)
|
||||||
|
69
api/services/vector_service.py
Normal file
69
api/services/vector_service.py
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
|
||||||
|
from typing import Optional, List
|
||||||
|
|
||||||
|
from langchain.schema import Document
|
||||||
|
|
||||||
|
from core.index.index import IndexBuilder
|
||||||
|
|
||||||
|
from models.dataset import Dataset, DocumentSegment
|
||||||
|
|
||||||
|
|
||||||
|
class VectorService:
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def create_segment_vector(cls, keywords: Optional[List[str]], segment: DocumentSegment, dataset: Dataset):
|
||||||
|
document = Document(
|
||||||
|
page_content=segment.content,
|
||||||
|
metadata={
|
||||||
|
"doc_id": segment.index_node_id,
|
||||||
|
"doc_hash": segment.index_node_hash,
|
||||||
|
"document_id": segment.document_id,
|
||||||
|
"dataset_id": segment.dataset_id,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# save vector index
|
||||||
|
index = IndexBuilder.get_index(dataset, 'high_quality')
|
||||||
|
if index:
|
||||||
|
index.add_texts([document], duplicate_check=True)
|
||||||
|
|
||||||
|
# save keyword index
|
||||||
|
index = IndexBuilder.get_index(dataset, 'economy')
|
||||||
|
if index:
|
||||||
|
if keywords and len(keywords) > 0:
|
||||||
|
index.create_segment_keywords(segment.index_node_id, keywords)
|
||||||
|
else:
|
||||||
|
index.add_texts([document])
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def update_segment_vector(cls, keywords: Optional[List[str]], segment: DocumentSegment, dataset: Dataset):
|
||||||
|
# update segment index task
|
||||||
|
vector_index = IndexBuilder.get_index(dataset, 'high_quality')
|
||||||
|
kw_index = IndexBuilder.get_index(dataset, 'economy')
|
||||||
|
# delete from vector index
|
||||||
|
if vector_index:
|
||||||
|
vector_index.delete_by_ids([segment.index_node_id])
|
||||||
|
|
||||||
|
# delete from keyword index
|
||||||
|
kw_index.delete_by_ids([segment.index_node_id])
|
||||||
|
|
||||||
|
# add new index
|
||||||
|
document = Document(
|
||||||
|
page_content=segment.content,
|
||||||
|
metadata={
|
||||||
|
"doc_id": segment.index_node_id,
|
||||||
|
"doc_hash": segment.index_node_hash,
|
||||||
|
"document_id": segment.document_id,
|
||||||
|
"dataset_id": segment.dataset_id,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# save vector index
|
||||||
|
if vector_index:
|
||||||
|
vector_index.add_texts([document], duplicate_check=True)
|
||||||
|
|
||||||
|
# save keyword index
|
||||||
|
if keywords and len(keywords) > 0:
|
||||||
|
kw_index.create_segment_keywords(segment.index_node_id, keywords)
|
||||||
|
else:
|
||||||
|
kw_index.add_texts([document])
|
95
api/tasks/batch_create_segment_to_index_task.py
Normal file
95
api/tasks/batch_create_segment_to_index_task.py
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
import datetime
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
import uuid
|
||||||
|
from typing import Optional, List
|
||||||
|
|
||||||
|
import click
|
||||||
|
from celery import shared_task
|
||||||
|
from sqlalchemy import func
|
||||||
|
from werkzeug.exceptions import NotFound
|
||||||
|
|
||||||
|
from core.index.index import IndexBuilder
|
||||||
|
from core.indexing_runner import IndexingRunner
|
||||||
|
from core.model_providers.model_factory import ModelFactory
|
||||||
|
from extensions.ext_database import db
|
||||||
|
from extensions.ext_redis import redis_client
|
||||||
|
from libs import helper
|
||||||
|
from models.dataset import DocumentSegment, Dataset, Document
|
||||||
|
|
||||||
|
|
||||||
|
@shared_task(queue='dataset')
|
||||||
|
def batch_create_segment_to_index_task(job_id: str, content: List, dataset_id: str, document_id: str,
|
||||||
|
tenant_id: str, user_id: str):
|
||||||
|
"""
|
||||||
|
Async batch create segment to index
|
||||||
|
:param job_id:
|
||||||
|
:param content:
|
||||||
|
:param dataset_id:
|
||||||
|
:param document_id:
|
||||||
|
:param tenant_id:
|
||||||
|
:param user_id:
|
||||||
|
|
||||||
|
Usage: batch_create_segment_to_index_task.delay(segment_id)
|
||||||
|
"""
|
||||||
|
logging.info(click.style('Start batch create segment jobId: {}'.format(job_id), fg='green'))
|
||||||
|
start_at = time.perf_counter()
|
||||||
|
|
||||||
|
indexing_cache_key = 'segment_batch_import_{}'.format(job_id)
|
||||||
|
|
||||||
|
try:
|
||||||
|
dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first()
|
||||||
|
if not dataset:
|
||||||
|
raise ValueError('Dataset not exist.')
|
||||||
|
|
||||||
|
dataset_document = db.session.query(Document).filter(Document.id == document_id).first()
|
||||||
|
if not dataset_document:
|
||||||
|
raise ValueError('Document not exist.')
|
||||||
|
|
||||||
|
if not dataset_document.enabled or dataset_document.archived or dataset_document.indexing_status != 'completed':
|
||||||
|
raise ValueError('Document is not available.')
|
||||||
|
document_segments = []
|
||||||
|
for segment in content:
|
||||||
|
content = segment['content']
|
||||||
|
doc_id = str(uuid.uuid4())
|
||||||
|
segment_hash = helper.generate_text_hash(content)
|
||||||
|
embedding_model = ModelFactory.get_embedding_model(
|
||||||
|
tenant_id=dataset.tenant_id,
|
||||||
|
model_provider_name=dataset.embedding_model_provider,
|
||||||
|
model_name=dataset.embedding_model
|
||||||
|
)
|
||||||
|
|
||||||
|
# calc embedding use tokens
|
||||||
|
tokens = embedding_model.get_num_tokens(content)
|
||||||
|
max_position = db.session.query(func.max(DocumentSegment.position)).filter(
|
||||||
|
DocumentSegment.document_id == dataset_document.id
|
||||||
|
).scalar()
|
||||||
|
segment_document = DocumentSegment(
|
||||||
|
tenant_id=tenant_id,
|
||||||
|
dataset_id=dataset_id,
|
||||||
|
document_id=document_id,
|
||||||
|
index_node_id=doc_id,
|
||||||
|
index_node_hash=segment_hash,
|
||||||
|
position=max_position + 1 if max_position else 1,
|
||||||
|
content=content,
|
||||||
|
word_count=len(content),
|
||||||
|
tokens=tokens,
|
||||||
|
created_by=user_id,
|
||||||
|
indexing_at=datetime.datetime.utcnow(),
|
||||||
|
status='completed',
|
||||||
|
completed_at=datetime.datetime.utcnow()
|
||||||
|
)
|
||||||
|
if dataset_document.doc_form == 'qa_model':
|
||||||
|
segment_document.answer = segment['answer']
|
||||||
|
db.session.add(segment_document)
|
||||||
|
document_segments.append(segment_document)
|
||||||
|
# add index to db
|
||||||
|
indexing_runner = IndexingRunner()
|
||||||
|
indexing_runner.batch_add_segments(document_segments, dataset)
|
||||||
|
db.session.commit()
|
||||||
|
redis_client.setex(indexing_cache_key, 600, 'completed')
|
||||||
|
end_at = time.perf_counter()
|
||||||
|
logging.info(click.style('Segment batch created job: {} latency: {}'.format(job_id, end_at - start_at), fg='green'))
|
||||||
|
except Exception as e:
|
||||||
|
logging.exception("Segments batch created index failed:{}".format(str(e)))
|
||||||
|
redis_client.setex(indexing_cache_key, 600, 'error')
|
58
api/tasks/delete_segment_from_index_task.py
Normal file
58
api/tasks/delete_segment_from_index_task.py
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
import logging
|
||||||
|
import time
|
||||||
|
|
||||||
|
import click
|
||||||
|
from celery import shared_task
|
||||||
|
from werkzeug.exceptions import NotFound
|
||||||
|
|
||||||
|
from core.index.index import IndexBuilder
|
||||||
|
from extensions.ext_database import db
|
||||||
|
from extensions.ext_redis import redis_client
|
||||||
|
from models.dataset import DocumentSegment, Dataset, Document
|
||||||
|
|
||||||
|
|
||||||
|
@shared_task(queue='dataset')
|
||||||
|
def delete_segment_from_index_task(segment_id: str, index_node_id: str, dataset_id: str, document_id: str):
|
||||||
|
"""
|
||||||
|
Async Remove segment from index
|
||||||
|
:param segment_id:
|
||||||
|
:param index_node_id:
|
||||||
|
:param dataset_id:
|
||||||
|
:param document_id:
|
||||||
|
|
||||||
|
Usage: delete_segment_from_index_task.delay(segment_id)
|
||||||
|
"""
|
||||||
|
logging.info(click.style('Start delete segment from index: {}'.format(segment_id), fg='green'))
|
||||||
|
start_at = time.perf_counter()
|
||||||
|
indexing_cache_key = 'segment_{}_delete_indexing'.format(segment_id)
|
||||||
|
try:
|
||||||
|
dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first()
|
||||||
|
if not dataset:
|
||||||
|
logging.info(click.style('Segment {} has no dataset, pass.'.format(segment_id), fg='cyan'))
|
||||||
|
return
|
||||||
|
|
||||||
|
dataset_document = db.session.query(Document).filter(Document.id == document_id).first()
|
||||||
|
if not dataset_document:
|
||||||
|
logging.info(click.style('Segment {} has no document, pass.'.format(segment_id), fg='cyan'))
|
||||||
|
return
|
||||||
|
|
||||||
|
if not dataset_document.enabled or dataset_document.archived or dataset_document.indexing_status != 'completed':
|
||||||
|
logging.info(click.style('Segment {} document status is invalid, pass.'.format(segment_id), fg='cyan'))
|
||||||
|
return
|
||||||
|
|
||||||
|
vector_index = IndexBuilder.get_index(dataset, 'high_quality')
|
||||||
|
kw_index = IndexBuilder.get_index(dataset, 'economy')
|
||||||
|
|
||||||
|
# delete from vector index
|
||||||
|
if vector_index:
|
||||||
|
vector_index.delete_by_ids([index_node_id])
|
||||||
|
|
||||||
|
# delete from keyword index
|
||||||
|
kw_index.delete_by_ids([index_node_id])
|
||||||
|
|
||||||
|
end_at = time.perf_counter()
|
||||||
|
logging.info(click.style('Segment deleted from index: {} latency: {}'.format(segment_id, end_at - start_at), fg='green'))
|
||||||
|
except Exception:
|
||||||
|
logging.exception("delete segment from index failed")
|
||||||
|
finally:
|
||||||
|
redis_client.delete(indexing_cache_key)
|
@ -12,14 +12,14 @@ from models.dataset import DocumentSegment
|
|||||||
|
|
||||||
|
|
||||||
@shared_task(queue='dataset')
|
@shared_task(queue='dataset')
|
||||||
def remove_segment_from_index_task(segment_id: str):
|
def disable_segment_from_index_task(segment_id: str):
|
||||||
"""
|
"""
|
||||||
Async Remove segment from index
|
Async disable segment from index
|
||||||
:param segment_id:
|
:param segment_id:
|
||||||
|
|
||||||
Usage: remove_segment_from_index.delay(segment_id)
|
Usage: disable_segment_from_index_task.delay(segment_id)
|
||||||
"""
|
"""
|
||||||
logging.info(click.style('Start remove segment from index: {}'.format(segment_id), fg='green'))
|
logging.info(click.style('Start disable segment from index: {}'.format(segment_id), fg='green'))
|
||||||
start_at = time.perf_counter()
|
start_at = time.perf_counter()
|
||||||
|
|
||||||
segment = db.session.query(DocumentSegment).filter(DocumentSegment.id == segment_id).first()
|
segment = db.session.query(DocumentSegment).filter(DocumentSegment.id == segment_id).first()
|
@ -52,17 +52,6 @@ def update_segment_keyword_index_task(segment_id: str):
|
|||||||
# delete from keyword index
|
# delete from keyword index
|
||||||
kw_index.delete_by_ids([segment.index_node_id])
|
kw_index.delete_by_ids([segment.index_node_id])
|
||||||
|
|
||||||
# add new index
|
|
||||||
document = Document(
|
|
||||||
page_content=segment.content,
|
|
||||||
metadata={
|
|
||||||
"doc_id": segment.index_node_id,
|
|
||||||
"doc_hash": segment.index_node_hash,
|
|
||||||
"document_id": segment.document_id,
|
|
||||||
"dataset_id": segment.dataset_id,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
# save keyword index
|
# save keyword index
|
||||||
index = IndexBuilder.get_index(dataset, 'economy')
|
index = IndexBuilder.get_index(dataset, 'economy')
|
||||||
if index:
|
if index:
|
||||||
|
@ -5,13 +5,14 @@ import Link from 'next/link'
|
|||||||
import type { MouseEventHandler } from 'react'
|
import type { MouseEventHandler } from 'react'
|
||||||
import { useCallback, useState } from 'react'
|
import { useCallback, useState } from 'react'
|
||||||
import { useTranslation } from 'react-i18next'
|
import { useTranslation } from 'react-i18next'
|
||||||
import classNames from 'classnames'
|
import cn from 'classnames'
|
||||||
import style from '../list.module.css'
|
import style from '../list.module.css'
|
||||||
import Confirm from '@/app/components/base/confirm'
|
import Confirm from '@/app/components/base/confirm'
|
||||||
import { ToastContext } from '@/app/components/base/toast'
|
import { ToastContext } from '@/app/components/base/toast'
|
||||||
import { deleteDataset } from '@/service/datasets'
|
import { deleteDataset } from '@/service/datasets'
|
||||||
import AppIcon from '@/app/components/base/app-icon'
|
import AppIcon from '@/app/components/base/app-icon'
|
||||||
import type { DataSet } from '@/models/datasets'
|
import type { DataSet } from '@/models/datasets'
|
||||||
|
import Tooltip from '@/app/components/base/tooltip'
|
||||||
|
|
||||||
export type DatasetCardProps = {
|
export type DatasetCardProps = {
|
||||||
dataset: DataSet
|
dataset: DataSet
|
||||||
@ -45,26 +46,36 @@ const DatasetCard = ({
|
|||||||
|
|
||||||
return (
|
return (
|
||||||
<>
|
<>
|
||||||
<Link href={`/datasets/${dataset.id}/documents`} className={style.listItem}>
|
<Link href={`/datasets/${dataset.id}/documents`} className={cn(style.listItem)}>
|
||||||
<div className={style.listItemTitle}>
|
<div className={style.listItemTitle}>
|
||||||
<AppIcon size='small' />
|
<AppIcon size='small' className={cn(!dataset.embedding_available && style.unavailable)} />
|
||||||
<div className={style.listItemHeading}>
|
<div className={cn(style.listItemHeading, !dataset.embedding_available && style.unavailable)}>
|
||||||
<div className={style.listItemHeadingContent}>{dataset.name}</div>
|
<div className={style.listItemHeadingContent}>
|
||||||
|
{dataset.name}
|
||||||
</div>
|
</div>
|
||||||
|
</div>
|
||||||
|
{!dataset.embedding_available && (
|
||||||
|
<Tooltip
|
||||||
|
selector={`dataset-tag-${dataset.id}`}
|
||||||
|
htmlContent={t('dataset.unavailableTip')}
|
||||||
|
>
|
||||||
|
<span className='px-1 border boder-gray-200 rounded-md text-gray-500 text-xs font-normal leading-[18px]'>{t('dataset.unavailable')}</span>
|
||||||
|
</Tooltip>
|
||||||
|
)}
|
||||||
<span className={style.deleteDatasetIcon} onClick={onDeleteClick} />
|
<span className={style.deleteDatasetIcon} onClick={onDeleteClick} />
|
||||||
</div>
|
</div>
|
||||||
<div className={style.listItemDescription}>{dataset.description}</div>
|
<div className={cn(style.listItemDescription, !dataset.embedding_available && style.unavailable)}>{dataset.description}</div>
|
||||||
<div className={classNames(style.listItemFooter, style.datasetCardFooter)}>
|
<div className={cn(style.listItemFooter, style.datasetCardFooter, !dataset.embedding_available && style.unavailable)}>
|
||||||
<span className={style.listItemStats}>
|
<span className={style.listItemStats}>
|
||||||
<span className={classNames(style.listItemFooterIcon, style.docIcon)} />
|
<span className={cn(style.listItemFooterIcon, style.docIcon)} />
|
||||||
{dataset.document_count}{t('dataset.documentCount')}
|
{dataset.document_count}{t('dataset.documentCount')}
|
||||||
</span>
|
</span>
|
||||||
<span className={style.listItemStats}>
|
<span className={style.listItemStats}>
|
||||||
<span className={classNames(style.listItemFooterIcon, style.textIcon)} />
|
<span className={cn(style.listItemFooterIcon, style.textIcon)} />
|
||||||
{Math.round(dataset.word_count / 1000)}{t('dataset.wordCount')}
|
{Math.round(dataset.word_count / 1000)}{t('dataset.wordCount')}
|
||||||
</span>
|
</span>
|
||||||
<span className={style.listItemStats}>
|
<span className={style.listItemStats}>
|
||||||
<span className={classNames(style.listItemFooterIcon, style.applicationIcon)} />
|
<span className={cn(style.listItemFooterIcon, style.applicationIcon)} />
|
||||||
{dataset.app_count}{t('dataset.appCount')}
|
{dataset.app_count}{t('dataset.appCount')}
|
||||||
</span>
|
</span>
|
||||||
</div>
|
</div>
|
||||||
|
@ -1,13 +1,7 @@
|
|||||||
import classNames from 'classnames'
|
|
||||||
import { getLocaleOnServer } from '@/i18n/server'
|
|
||||||
import { useTranslation } from '@/i18n/i18next-serverside-config'
|
|
||||||
import Datasets from './Datasets'
|
import Datasets from './Datasets'
|
||||||
import DatasetFooter from './DatasetFooter'
|
import DatasetFooter from './DatasetFooter'
|
||||||
|
|
||||||
const AppList = async () => {
|
const AppList = async () => {
|
||||||
const locale = getLocaleOnServer()
|
|
||||||
const { t } = await useTranslation(locale, 'dataset')
|
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className='flex flex-col overflow-auto bg-gray-100 shrink-0 grow'>
|
<div className='flex flex-col overflow-auto bg-gray-100 shrink-0 grow'>
|
||||||
<Datasets />
|
<Datasets />
|
||||||
|
@ -192,3 +192,11 @@
|
|||||||
@apply inline-flex items-center mb-2 text-sm font-medium;
|
@apply inline-flex items-center mb-2 text-sm font-medium;
|
||||||
}
|
}
|
||||||
/* #endregion new app dialog */
|
/* #endregion new app dialog */
|
||||||
|
|
||||||
|
.unavailable {
|
||||||
|
@apply opacity-50;
|
||||||
|
}
|
||||||
|
|
||||||
|
.listItem:hover .unavailable {
|
||||||
|
@apply opacity-100;
|
||||||
|
}
|
||||||
|
@ -7,6 +7,7 @@ import TypeIcon from '../type-icon'
|
|||||||
import RemoveIcon from '../../base/icons/remove-icon'
|
import RemoveIcon from '../../base/icons/remove-icon'
|
||||||
import s from './style.module.css'
|
import s from './style.module.css'
|
||||||
import { formatNumber } from '@/utils/format'
|
import { formatNumber } from '@/utils/format'
|
||||||
|
import Tooltip from '@/app/components/base/tooltip'
|
||||||
|
|
||||||
export type ICardItemProps = {
|
export type ICardItemProps = {
|
||||||
className?: string
|
className?: string
|
||||||
@ -36,10 +37,22 @@ const CardItem: FC<ICardItemProps> = ({
|
|||||||
'flex items-center justify-between rounded-xl px-3 py-2.5 bg-white border border-gray-200 cursor-pointer')
|
'flex items-center justify-between rounded-xl px-3 py-2.5 bg-white border border-gray-200 cursor-pointer')
|
||||||
}>
|
}>
|
||||||
<div className='shrink-0 flex items-center space-x-2'>
|
<div className='shrink-0 flex items-center space-x-2'>
|
||||||
|
<div className={cn(!config.embedding_available && 'opacity-50')}>
|
||||||
<TypeIcon type="upload_file" />
|
<TypeIcon type="upload_file" />
|
||||||
|
</div>
|
||||||
<div>
|
<div>
|
||||||
<div className='w-[160px] text-[13px] leading-[18px] font-medium text-gray-800 overflow-hidden text-ellipsis whitespace-nowrap'>{config.name}</div>
|
<div className='flex items-center w-[160px] mr-1'>
|
||||||
<div className='flex text-xs text-gray-500'>
|
<div className={cn('text-[13px] leading-[18px] font-medium text-gray-800 overflow-hidden text-ellipsis whitespace-nowrap', !config.embedding_available && 'opacity-50')}>{config.name}</div>
|
||||||
|
{!config.embedding_available && (
|
||||||
|
<Tooltip
|
||||||
|
selector={`unavailable-tag-${config.id}`}
|
||||||
|
htmlContent={t('dataset.unavailableTip')}
|
||||||
|
>
|
||||||
|
<span className='shrink-0 px-1 border boder-gray-200 rounded-md text-gray-500 text-xs font-normal leading-[18px]'>{t('dataset.unavailable')}</span>
|
||||||
|
</Tooltip>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
<div className={cn('flex text-xs text-gray-500', !config.embedding_available && 'opacity-50')}>
|
||||||
{formatNumber(config.word_count)} {t('appDebug.feature.dataSet.words')} · {formatNumber(config.document_count)} {t('appDebug.feature.dataSet.textBlocks')}
|
{formatNumber(config.word_count)} {t('appDebug.feature.dataSet.words')} · {formatNumber(config.document_count)} {t('appDebug.feature.dataSet.textBlocks')}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
@ -120,15 +120,24 @@ const SelectDataSet: FC<ISelectDataSetProps> = ({
|
|||||||
{datasets.map(item => (
|
{datasets.map(item => (
|
||||||
<div
|
<div
|
||||||
key={item.id}
|
key={item.id}
|
||||||
className={cn(s.item, selected.some(i => i.id === item.id) && s.selected, 'flex justify-between items-center h-10 px-2 rounded-lg bg-white border border-gray-200 cursor-pointer')}
|
className={cn(s.item, selected.some(i => i.id === item.id) && s.selected, 'flex justify-between items-center h-10 px-2 rounded-lg bg-white border border-gray-200 cursor-pointer', !item.embedding_available && s.disabled)}
|
||||||
onClick={() => toggleSelect(item)}
|
onClick={() => {
|
||||||
|
if (!item.embedding_available)
|
||||||
|
return
|
||||||
|
toggleSelect(item)
|
||||||
|
}}
|
||||||
>
|
>
|
||||||
<div className='flex items-center space-x-2'>
|
<div className='mr-1 flex items-center'>
|
||||||
|
<div className={cn('mr-2', !item.embedding_available && 'opacity-50')}>
|
||||||
<TypeIcon type="upload_file" size='md' />
|
<TypeIcon type="upload_file" size='md' />
|
||||||
<div className='max-w-[200px] text-[13px] font-medium text-gray-800 overflow-hidden text-ellipsis whitespace-nowrap'>{item.name}</div>
|
</div>
|
||||||
|
<div className={cn('max-w-[200px] text-[13px] font-medium text-gray-800 overflow-hidden text-ellipsis whitespace-nowrap', !item.embedding_available && 'opacity-50 !max-w-[120px]')}>{item.name}</div>
|
||||||
|
{!item.embedding_available && (
|
||||||
|
<span className='ml-1 shrink-0 px-1 border boder-gray-200 rounded-md text-gray-500 text-xs font-normal leading-[18px]'>{t('dataset.unavailable')}</span>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div className='flex text-xs text-gray-500 overflow-hidden whitespace-nowrap'>
|
<div className={cn('shrink-0 flex text-xs text-gray-500 overflow-hidden whitespace-nowrap', !item.embedding_available && 'opacity-50')}>
|
||||||
<span className='max-w-[100px] overflow-hidden text-ellipsis whitespace-nowrap'>{formatNumber(item.word_count)}</span>
|
<span className='max-w-[100px] overflow-hidden text-ellipsis whitespace-nowrap'>{formatNumber(item.word_count)}</span>
|
||||||
{t('appDebug.feature.dataSet.words')}
|
{t('appDebug.feature.dataSet.words')}
|
||||||
<span className='px-0.5'>·</span>
|
<span className='px-0.5'>·</span>
|
||||||
|
@ -7,3 +7,7 @@
|
|||||||
background: #F5F8FF;
|
background: #F5F8FF;
|
||||||
border-color: #528BFF;
|
border-color: #528BFF;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.item.disabled {
|
||||||
|
@apply bg-white border-gray-200 cursor-default;
|
||||||
|
}
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
<svg width="12" height="12" viewBox="0 0 12 12" fill="none" xmlns="http://www.w3.org/2000/svg">
|
<svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||||
<g id="Icon">
|
<g id="Icon">
|
||||||
<g id="Icon_2">
|
<g id="Icon_2">
|
||||||
<path d="M6 6.5C6.27614 6.5 6.5 6.27614 6.5 6C6.5 5.72386 6.27614 5.5 6 5.5C5.72386 5.5 5.5 5.72386 5.5 6C5.5 6.27614 5.72386 6.5 6 6.5Z" stroke="#344054" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
|
<path d="M8.00008 8.66634C8.36827 8.66634 8.66675 8.36786 8.66675 7.99967C8.66675 7.63148 8.36827 7.33301 8.00008 7.33301C7.63189 7.33301 7.33341 7.63148 7.33341 7.99967C7.33341 8.36786 7.63189 8.66634 8.00008 8.66634Z" stroke="#344054" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
|
||||||
<path d="M9.5 6.5C9.77614 6.5 10 6.27614 10 6C10 5.72386 9.77614 5.5 9.5 5.5C9.22386 5.5 9 5.72386 9 6C9 6.27614 9.22386 6.5 9.5 6.5Z" stroke="#344054" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
|
<path d="M12.6667 8.66634C13.0349 8.66634 13.3334 8.36786 13.3334 7.99967C13.3334 7.63148 13.0349 7.33301 12.6667 7.33301C12.2986 7.33301 12.0001 7.63148 12.0001 7.99967C12.0001 8.36786 12.2986 8.66634 12.6667 8.66634Z" stroke="#344054" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
|
||||||
<path d="M2.5 6.5C2.77614 6.5 3 6.27614 3 6C3 5.72386 2.77614 5.5 2.5 5.5C2.22386 5.5 2 5.72386 2 6C2 6.27614 2.22386 6.5 2.5 6.5Z" stroke="#344054" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
|
<path d="M3.33341 8.66634C3.7016 8.66634 4.00008 8.36786 4.00008 7.99967C4.00008 7.63148 3.7016 7.33301 3.33341 7.33301C2.96522 7.33301 2.66675 7.63148 2.66675 7.99967C2.66675 8.36786 2.96522 8.66634 3.33341 8.66634Z" stroke="#344054" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
|
||||||
</g>
|
</g>
|
||||||
</g>
|
</g>
|
||||||
</svg>
|
</svg>
|
||||||
|
Before Width: | Height: | Size: 803 B After Width: | Height: | Size: 1.0 KiB |
@ -4,9 +4,9 @@
|
|||||||
"isRootNode": true,
|
"isRootNode": true,
|
||||||
"name": "svg",
|
"name": "svg",
|
||||||
"attributes": {
|
"attributes": {
|
||||||
"width": "12",
|
"width": "16",
|
||||||
"height": "12",
|
"height": "16",
|
||||||
"viewBox": "0 0 12 12",
|
"viewBox": "0 0 16 16",
|
||||||
"fill": "none",
|
"fill": "none",
|
||||||
"xmlns": "http://www.w3.org/2000/svg"
|
"xmlns": "http://www.w3.org/2000/svg"
|
||||||
},
|
},
|
||||||
@ -29,7 +29,7 @@
|
|||||||
"type": "element",
|
"type": "element",
|
||||||
"name": "path",
|
"name": "path",
|
||||||
"attributes": {
|
"attributes": {
|
||||||
"d": "M6 6.5C6.27614 6.5 6.5 6.27614 6.5 6C6.5 5.72386 6.27614 5.5 6 5.5C5.72386 5.5 5.5 5.72386 5.5 6C5.5 6.27614 5.72386 6.5 6 6.5Z",
|
"d": "M8.00008 8.66634C8.36827 8.66634 8.66675 8.36786 8.66675 7.99967C8.66675 7.63148 8.36827 7.33301 8.00008 7.33301C7.63189 7.33301 7.33341 7.63148 7.33341 7.99967C7.33341 8.36786 7.63189 8.66634 8.00008 8.66634Z",
|
||||||
"stroke": "currentColor",
|
"stroke": "currentColor",
|
||||||
"stroke-width": "1.5",
|
"stroke-width": "1.5",
|
||||||
"stroke-linecap": "round",
|
"stroke-linecap": "round",
|
||||||
@ -41,7 +41,7 @@
|
|||||||
"type": "element",
|
"type": "element",
|
||||||
"name": "path",
|
"name": "path",
|
||||||
"attributes": {
|
"attributes": {
|
||||||
"d": "M9.5 6.5C9.77614 6.5 10 6.27614 10 6C10 5.72386 9.77614 5.5 9.5 5.5C9.22386 5.5 9 5.72386 9 6C9 6.27614 9.22386 6.5 9.5 6.5Z",
|
"d": "M12.6667 8.66634C13.0349 8.66634 13.3334 8.36786 13.3334 7.99967C13.3334 7.63148 13.0349 7.33301 12.6667 7.33301C12.2986 7.33301 12.0001 7.63148 12.0001 7.99967C12.0001 8.36786 12.2986 8.66634 12.6667 8.66634Z",
|
||||||
"stroke": "currentColor",
|
"stroke": "currentColor",
|
||||||
"stroke-width": "1.5",
|
"stroke-width": "1.5",
|
||||||
"stroke-linecap": "round",
|
"stroke-linecap": "round",
|
||||||
@ -53,7 +53,7 @@
|
|||||||
"type": "element",
|
"type": "element",
|
||||||
"name": "path",
|
"name": "path",
|
||||||
"attributes": {
|
"attributes": {
|
||||||
"d": "M2.5 6.5C2.77614 6.5 3 6.27614 3 6C3 5.72386 2.77614 5.5 2.5 5.5C2.22386 5.5 2 5.72386 2 6C2 6.27614 2.22386 6.5 2.5 6.5Z",
|
"d": "M3.33341 8.66634C3.7016 8.66634 4.00008 8.36786 4.00008 7.99967C4.00008 7.63148 3.7016 7.33301 3.33341 7.33301C2.96522 7.33301 2.66675 7.63148 2.66675 7.99967C2.66675 8.36786 2.96522 8.66634 3.33341 8.66634Z",
|
||||||
"stroke": "currentColor",
|
"stroke": "currentColor",
|
||||||
"stroke-width": "1.5",
|
"stroke-width": "1.5",
|
||||||
"stroke-linecap": "round",
|
"stroke-linecap": "round",
|
||||||
|
@ -9,6 +9,7 @@ type IPopover = {
|
|||||||
position?: 'bottom' | 'br'
|
position?: 'bottom' | 'br'
|
||||||
btnElement?: string | React.ReactNode
|
btnElement?: string | React.ReactNode
|
||||||
btnClassName?: string | ((open: boolean) => string)
|
btnClassName?: string | ((open: boolean) => string)
|
||||||
|
manualClose?: boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
const timeoutDuration = 100
|
const timeoutDuration = 100
|
||||||
@ -20,6 +21,7 @@ export default function CustomPopover({
|
|||||||
btnElement,
|
btnElement,
|
||||||
className,
|
className,
|
||||||
btnClassName,
|
btnClassName,
|
||||||
|
manualClose,
|
||||||
}: IPopover) {
|
}: IPopover) {
|
||||||
const buttonRef = useRef<HTMLButtonElement>(null)
|
const buttonRef = useRef<HTMLButtonElement>(null)
|
||||||
const timeOutRef = useRef<NodeJS.Timeout | null>(null)
|
const timeOutRef = useRef<NodeJS.Timeout | null>(null)
|
||||||
@ -62,17 +64,14 @@ export default function CustomPopover({
|
|||||||
</Popover.Button>
|
</Popover.Button>
|
||||||
<Transition as={Fragment}>
|
<Transition as={Fragment}>
|
||||||
<Popover.Panel
|
<Popover.Panel
|
||||||
className={`${s.popupPanel} ${
|
className={`${s.popupPanel} ${position === 'br' ? 'right-0' : 'translate-x-1/2 left-1/2'} ${className}`}
|
||||||
position === 'br'
|
|
||||||
? 'right-0'
|
|
||||||
: 'transform -translate-x-1/2 left-1/2'
|
|
||||||
} ${className}`}
|
|
||||||
{...(trigger !== 'hover'
|
{...(trigger !== 'hover'
|
||||||
? {}
|
? {}
|
||||||
: {
|
: {
|
||||||
onMouseLeave: () => onMouseLeave(open),
|
onMouseLeave: () => onMouseLeave(open),
|
||||||
onMouseEnter: () => onMouseEnter(open),
|
onMouseEnter: () => onMouseEnter(open),
|
||||||
})}
|
})
|
||||||
|
}
|
||||||
>
|
>
|
||||||
{({ close }) => (
|
{({ close }) => (
|
||||||
<div
|
<div
|
||||||
@ -82,10 +81,16 @@ export default function CustomPopover({
|
|||||||
: {
|
: {
|
||||||
onMouseLeave: () => onMouseLeave(open),
|
onMouseLeave: () => onMouseLeave(open),
|
||||||
onMouseEnter: () => onMouseEnter(open),
|
onMouseEnter: () => onMouseEnter(open),
|
||||||
})}
|
})
|
||||||
|
}
|
||||||
>
|
>
|
||||||
{cloneElement(htmlContent as React.ReactElement, {
|
{cloneElement(htmlContent as React.ReactElement, {
|
||||||
onClose: () => close(),
|
onClose: () => onMouseLeave(open),
|
||||||
|
...(manualClose
|
||||||
|
? {
|
||||||
|
onClick: close,
|
||||||
|
}
|
||||||
|
: {}),
|
||||||
})}
|
})}
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
@ -29,7 +29,7 @@ const ACCEPTS = [
|
|||||||
'.txt',
|
'.txt',
|
||||||
// '.xls',
|
// '.xls',
|
||||||
'.xlsx',
|
'.xlsx',
|
||||||
'.csv',
|
// '.csv',
|
||||||
]
|
]
|
||||||
|
|
||||||
const FileUploader = ({
|
const FileUploader = ({
|
||||||
|
@ -2,12 +2,14 @@
|
|||||||
'use client'
|
'use client'
|
||||||
import React, { useEffect, useLayoutEffect, useRef, useState } from 'react'
|
import React, { useEffect, useLayoutEffect, useRef, useState } from 'react'
|
||||||
import { useTranslation } from 'react-i18next'
|
import { useTranslation } from 'react-i18next'
|
||||||
|
import { useContext } from 'use-context-selector'
|
||||||
import { useBoolean } from 'ahooks'
|
import { useBoolean } from 'ahooks'
|
||||||
import { XMarkIcon } from '@heroicons/react/20/solid'
|
import { XMarkIcon } from '@heroicons/react/20/solid'
|
||||||
import cn from 'classnames'
|
import cn from 'classnames'
|
||||||
import Link from 'next/link'
|
import Link from 'next/link'
|
||||||
import { groupBy } from 'lodash-es'
|
import { groupBy } from 'lodash-es'
|
||||||
import PreviewItem, { PreviewType } from './preview-item'
|
import PreviewItem, { PreviewType } from './preview-item'
|
||||||
|
import LanguageSelect from './language-select'
|
||||||
import s from './index.module.css'
|
import s from './index.module.css'
|
||||||
import type { CreateDocumentReq, CustomFile, FullDocumentDetail, FileIndexingEstimateResponse as IndexingEstimateResponse, NotionInfo, PreProcessingRule, Rules, createDocumentResponse } from '@/models/datasets'
|
import type { CreateDocumentReq, CustomFile, FullDocumentDetail, FileIndexingEstimateResponse as IndexingEstimateResponse, NotionInfo, PreProcessingRule, Rules, createDocumentResponse } from '@/models/datasets'
|
||||||
import {
|
import {
|
||||||
@ -22,11 +24,13 @@ import Loading from '@/app/components/base/loading'
|
|||||||
import Toast from '@/app/components/base/toast'
|
import Toast from '@/app/components/base/toast'
|
||||||
import { formatNumber } from '@/utils/format'
|
import { formatNumber } from '@/utils/format'
|
||||||
import type { DataSourceNotionPage } from '@/models/common'
|
import type { DataSourceNotionPage } from '@/models/common'
|
||||||
import { DataSourceType } from '@/models/datasets'
|
import { DataSourceType, DocForm } from '@/models/datasets'
|
||||||
import NotionIcon from '@/app/components/base/notion-icon'
|
import NotionIcon from '@/app/components/base/notion-icon'
|
||||||
import Switch from '@/app/components/base/switch'
|
import Switch from '@/app/components/base/switch'
|
||||||
import { MessageChatSquare } from '@/app/components/base/icons/src/public/common'
|
import { MessageChatSquare } from '@/app/components/base/icons/src/public/common'
|
||||||
|
import { XClose } from '@/app/components/base/icons/src/vender/line/general'
|
||||||
import { useDatasetDetailContext } from '@/context/dataset-detail'
|
import { useDatasetDetailContext } from '@/context/dataset-detail'
|
||||||
|
import I18n from '@/context/i18n'
|
||||||
import { IS_CE_EDITION } from '@/config'
|
import { IS_CE_EDITION } from '@/config'
|
||||||
|
|
||||||
type Page = DataSourceNotionPage & { workspace_id: string }
|
type Page = DataSourceNotionPage & { workspace_id: string }
|
||||||
@ -56,10 +60,6 @@ enum IndexingType {
|
|||||||
QUALIFIED = 'high_quality',
|
QUALIFIED = 'high_quality',
|
||||||
ECONOMICAL = 'economy',
|
ECONOMICAL = 'economy',
|
||||||
}
|
}
|
||||||
enum DocForm {
|
|
||||||
TEXT = 'text_model',
|
|
||||||
QA = 'qa_model',
|
|
||||||
}
|
|
||||||
|
|
||||||
const StepTwo = ({
|
const StepTwo = ({
|
||||||
isSetting,
|
isSetting,
|
||||||
@ -78,6 +78,8 @@ const StepTwo = ({
|
|||||||
onCancel,
|
onCancel,
|
||||||
}: StepTwoProps) => {
|
}: StepTwoProps) => {
|
||||||
const { t } = useTranslation()
|
const { t } = useTranslation()
|
||||||
|
const { locale } = useContext(I18n)
|
||||||
|
|
||||||
const { mutateDatasetRes } = useDatasetDetailContext()
|
const { mutateDatasetRes } = useDatasetDetailContext()
|
||||||
const scrollRef = useRef<HTMLDivElement>(null)
|
const scrollRef = useRef<HTMLDivElement>(null)
|
||||||
const [scrolled, setScrolled] = useState(false)
|
const [scrolled, setScrolled] = useState(false)
|
||||||
@ -98,6 +100,8 @@ const StepTwo = ({
|
|||||||
const [docForm, setDocForm] = useState<DocForm | string>(
|
const [docForm, setDocForm] = useState<DocForm | string>(
|
||||||
datasetId && documentDetail ? documentDetail.doc_form : DocForm.TEXT,
|
datasetId && documentDetail ? documentDetail.doc_form : DocForm.TEXT,
|
||||||
)
|
)
|
||||||
|
const [docLanguage, setDocLanguage] = useState<string>(locale === 'en' ? 'English' : 'Chinese')
|
||||||
|
const [QATipHide, setQATipHide] = useState(false)
|
||||||
const [previewSwitched, setPreviewSwitched] = useState(false)
|
const [previewSwitched, setPreviewSwitched] = useState(false)
|
||||||
const [showPreview, { setTrue: setShowPreview, setFalse: hidePreview }] = useBoolean()
|
const [showPreview, { setTrue: setShowPreview, setFalse: hidePreview }] = useBoolean()
|
||||||
const [customFileIndexingEstimate, setCustomFileIndexingEstimate] = useState<IndexingEstimateResponse | null>(null)
|
const [customFileIndexingEstimate, setCustomFileIndexingEstimate] = useState<IndexingEstimateResponse | null>(null)
|
||||||
@ -230,6 +234,8 @@ const StepTwo = ({
|
|||||||
indexing_technique: getIndexing_technique(),
|
indexing_technique: getIndexing_technique(),
|
||||||
process_rule: getProcessRule(),
|
process_rule: getProcessRule(),
|
||||||
doc_form: docForm,
|
doc_form: docForm,
|
||||||
|
doc_language: docLanguage,
|
||||||
|
dataset_id: datasetId,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (dataSourceType === DataSourceType.NOTION) {
|
if (dataSourceType === DataSourceType.NOTION) {
|
||||||
@ -241,6 +247,8 @@ const StepTwo = ({
|
|||||||
indexing_technique: getIndexing_technique(),
|
indexing_technique: getIndexing_technique(),
|
||||||
process_rule: getProcessRule(),
|
process_rule: getProcessRule(),
|
||||||
doc_form: docForm,
|
doc_form: docForm,
|
||||||
|
doc_language: docLanguage,
|
||||||
|
dataset_id: datasetId,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return params
|
return params
|
||||||
@ -252,6 +260,7 @@ const StepTwo = ({
|
|||||||
params = {
|
params = {
|
||||||
original_document_id: documentDetail?.id,
|
original_document_id: documentDetail?.id,
|
||||||
doc_form: docForm,
|
doc_form: docForm,
|
||||||
|
doc_language: docLanguage,
|
||||||
process_rule: getProcessRule(),
|
process_rule: getProcessRule(),
|
||||||
} as CreateDocumentReq
|
} as CreateDocumentReq
|
||||||
}
|
}
|
||||||
@ -266,6 +275,7 @@ const StepTwo = ({
|
|||||||
indexing_technique: getIndexing_technique(),
|
indexing_technique: getIndexing_technique(),
|
||||||
process_rule: getProcessRule(),
|
process_rule: getProcessRule(),
|
||||||
doc_form: docForm,
|
doc_form: docForm,
|
||||||
|
doc_language: docLanguage,
|
||||||
} as CreateDocumentReq
|
} as CreateDocumentReq
|
||||||
if (dataSourceType === DataSourceType.FILE) {
|
if (dataSourceType === DataSourceType.FILE) {
|
||||||
params.data_source.info_list.file_info_list = {
|
params.data_source.info_list.file_info_list = {
|
||||||
@ -348,6 +358,10 @@ const StepTwo = ({
|
|||||||
setDocForm(DocForm.TEXT)
|
setDocForm(DocForm.TEXT)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const handleSelect = (language: string) => {
|
||||||
|
setDocLanguage(language)
|
||||||
|
}
|
||||||
|
|
||||||
const changeToEconomicalType = () => {
|
const changeToEconomicalType = () => {
|
||||||
if (!hasSetIndexType) {
|
if (!hasSetIndexType) {
|
||||||
setIndexType(IndexingType.ECONOMICAL)
|
setIndexType(IndexingType.ECONOMICAL)
|
||||||
@ -574,13 +588,17 @@ const StepTwo = ({
|
|||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
{IS_CE_EDITION && indexType === IndexingType.QUALIFIED && (
|
{IS_CE_EDITION && indexType === IndexingType.QUALIFIED && (
|
||||||
<div className='flex justify-between items-center mt-3 px-5 py-4 rounded-xl bg-gray-50 border border-gray-100'>
|
<div className='mt-3 rounded-xl bg-gray-50 border border-gray-100'>
|
||||||
|
<div className='flex justify-between items-center px-5 py-4'>
|
||||||
<div className='flex justify-center items-center w-8 h-8 rounded-lg bg-indigo-50'>
|
<div className='flex justify-center items-center w-8 h-8 rounded-lg bg-indigo-50'>
|
||||||
<MessageChatSquare className='w-4 h-4' />
|
<MessageChatSquare className='w-4 h-4' />
|
||||||
</div>
|
</div>
|
||||||
<div className='grow mx-3'>
|
<div className='grow mx-3'>
|
||||||
<div className='mb-[2px] text-md font-medium text-gray-900'>{t('datasetCreation.stepTwo.QATitle')}</div>
|
<div className='mb-[2px] text-md font-medium text-gray-900'>{t('datasetCreation.stepTwo.QATitle')}</div>
|
||||||
<div className='text-[13px] leading-[18px] text-gray-500'>{t('datasetCreation.stepTwo.QATip')}</div>
|
<div className='inline-flex items-center text-[13px] leading-[18px] text-gray-500'>
|
||||||
|
<span className='pr-1'>{t('datasetCreation.stepTwo.QALanguage')}</span>
|
||||||
|
<LanguageSelect currentLanguage={docLanguage} onSelect={handleSelect} />
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div className='shrink-0'>
|
<div className='shrink-0'>
|
||||||
<Switch
|
<Switch
|
||||||
@ -590,6 +608,13 @@ const StepTwo = ({
|
|||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
{docForm === DocForm.QA && !QATipHide && (
|
||||||
|
<div className='flex justify-between items-center px-5 py-2 bg-orange-50 border-t border-amber-100 rounded-b-xl text-[13px] leading-[18px] text-medium text-amber-500'>
|
||||||
|
{t('datasetCreation.stepTwo.QATip')}
|
||||||
|
<XClose className='w-4 h-4 text-gray-500 cursor-pointer' onClick={() => setQATipHide(true)} />
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
)}
|
)}
|
||||||
<div className={s.source}>
|
<div className={s.source}>
|
||||||
<div className={s.sourceContent}>
|
<div className={s.sourceContent}>
|
||||||
|
@ -0,0 +1,38 @@
|
|||||||
|
'use client'
|
||||||
|
import type { FC } from 'react'
|
||||||
|
import React from 'react'
|
||||||
|
import cn from 'classnames'
|
||||||
|
import { ChevronDown } from '@/app/components/base/icons/src/vender/line/arrows'
|
||||||
|
import Popover from '@/app/components/base/popover'
|
||||||
|
|
||||||
|
export type ILanguageSelectProps = {
|
||||||
|
currentLanguage: string
|
||||||
|
onSelect: (language: string) => void
|
||||||
|
}
|
||||||
|
|
||||||
|
const LanguageSelect: FC<ILanguageSelectProps> = ({
|
||||||
|
currentLanguage,
|
||||||
|
onSelect,
|
||||||
|
}) => {
|
||||||
|
return (
|
||||||
|
<Popover
|
||||||
|
manualClose
|
||||||
|
trigger='click'
|
||||||
|
htmlContent={
|
||||||
|
<div className='w-full py-1'>
|
||||||
|
<div className='py-2 px-3 mx-1 flex items-center gap-2 hover:bg-gray-100 rounded-lg cursor-pointer text-gray-700 text-sm' onClick={() => onSelect('English')}>English</div>
|
||||||
|
<div className='py-2 px-3 mx-1 flex items-center gap-2 hover:bg-gray-100 rounded-lg cursor-pointer text-gray-700 text-sm' onClick={() => onSelect('Chinese')}>简体中文</div>
|
||||||
|
</div>
|
||||||
|
}
|
||||||
|
btnElement={
|
||||||
|
<div className='inline-flex items-center'>
|
||||||
|
<span className='pr-[2px] text-xs leading-[18px] font-medium'>{currentLanguage === 'English' ? 'English' : '简体中文'}</span>
|
||||||
|
<ChevronDown className='w-3 h-3 opacity-60' />
|
||||||
|
</div>
|
||||||
|
}
|
||||||
|
btnClassName={open => cn('!border-0 !px-0 !py-0 !bg-inherit !hover:bg-inherit', open ? 'text-blue-600' : 'text-gray-500')}
|
||||||
|
className='!w-[120px] h-fit !z-20 !translate-x-0 !left-[-16px]'
|
||||||
|
/>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
export default React.memo(LanguageSelect)
|
@ -0,0 +1,108 @@
|
|||||||
|
'use client'
|
||||||
|
import type { FC } from 'react'
|
||||||
|
import React from 'react'
|
||||||
|
import {
|
||||||
|
useCSVDownloader,
|
||||||
|
} from 'react-papaparse'
|
||||||
|
import { useTranslation } from 'react-i18next'
|
||||||
|
import { useContext } from 'use-context-selector'
|
||||||
|
import { Download02 as DownloadIcon } from '@/app/components/base/icons/src/vender/solid/general'
|
||||||
|
import { DocForm } from '@/models/datasets'
|
||||||
|
import I18n from '@/context/i18n'
|
||||||
|
|
||||||
|
const CSV_TEMPLATE_QA_EN = [
|
||||||
|
['question', 'answer'],
|
||||||
|
['question1', 'answer1'],
|
||||||
|
['question2', 'answer2'],
|
||||||
|
]
|
||||||
|
const CSV_TEMPLATE_QA_CN = [
|
||||||
|
['问题', '答案'],
|
||||||
|
['问题 1', '答案 1'],
|
||||||
|
['问题 2', '答案 2'],
|
||||||
|
]
|
||||||
|
const CSV_TEMPLATE_EN = [
|
||||||
|
['segment content'],
|
||||||
|
['content1'],
|
||||||
|
['content2'],
|
||||||
|
]
|
||||||
|
const CSV_TEMPLATE_CN = [
|
||||||
|
['分段内容'],
|
||||||
|
['内容 1'],
|
||||||
|
['内容 2'],
|
||||||
|
]
|
||||||
|
|
||||||
|
const CSVDownload: FC<{ docForm: DocForm }> = ({ docForm }) => {
|
||||||
|
const { t } = useTranslation()
|
||||||
|
const { locale } = useContext(I18n)
|
||||||
|
const { CSVDownloader, Type } = useCSVDownloader()
|
||||||
|
|
||||||
|
const getTemplate = () => {
|
||||||
|
if (locale === 'en') {
|
||||||
|
if (docForm === DocForm.QA)
|
||||||
|
return CSV_TEMPLATE_QA_EN
|
||||||
|
return CSV_TEMPLATE_EN
|
||||||
|
}
|
||||||
|
if (docForm === DocForm.QA)
|
||||||
|
return CSV_TEMPLATE_QA_CN
|
||||||
|
return CSV_TEMPLATE_CN
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className='mt-6'>
|
||||||
|
<div className='text-sm text-gray-900 font-medium'>{t('share.generation.csvStructureTitle')}</div>
|
||||||
|
<div className='mt-2 max-h-[500px] overflow-auto'>
|
||||||
|
{docForm === DocForm.QA && (
|
||||||
|
<table className='table-fixed w-full border-separate border-spacing-0 border border-gray-200 rounded-lg text-xs'>
|
||||||
|
<thead className='text-gray-500'>
|
||||||
|
<tr>
|
||||||
|
<td className='h-9 pl-3 pr-2 border-b border-gray-200'>{t('datasetDocuments.list.batchModal.question')}</td>
|
||||||
|
<td className='h-9 pl-3 pr-2 border-b border-gray-200'>{t('datasetDocuments.list.batchModal.answer')}</td>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody className='text-gray-700'>
|
||||||
|
<tr>
|
||||||
|
<td className='h-9 pl-3 pr-2 border-b border-gray-100 text-[13px]'>{t('datasetDocuments.list.batchModal.question')} 1</td>
|
||||||
|
<td className='h-9 pl-3 pr-2 border-b border-gray-100 text-[13px]'>{t('datasetDocuments.list.batchModal.answer')} 1</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td className='h-9 pl-3 pr-2 text-[13px]'>{t('datasetDocuments.list.batchModal.question')} 2</td>
|
||||||
|
<td className='h-9 pl-3 pr-2 text-[13px]'>{t('datasetDocuments.list.batchModal.answer')} 2</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
)}
|
||||||
|
{docForm === DocForm.TEXT && (
|
||||||
|
<table className='table-fixed w-full border-separate border-spacing-0 border border-gray-200 rounded-lg text-xs'>
|
||||||
|
<thead className='text-gray-500'>
|
||||||
|
<tr>
|
||||||
|
<td className='h-9 pl-3 pr-2 border-b border-gray-200'>{t('datasetDocuments.list.batchModal.contentTitle')}</td>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody className='text-gray-700'>
|
||||||
|
<tr>
|
||||||
|
<td className='h-9 pl-3 pr-2 border-b border-gray-100 text-[13px]'>{t('datasetDocuments.list.batchModal.content')} 1</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td className='h-9 pl-3 pr-2 text-[13px]'>{t('datasetDocuments.list.batchModal.content')} 2</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
<CSVDownloader
|
||||||
|
className="block mt-2 cursor-pointer"
|
||||||
|
type={Type.Link}
|
||||||
|
filename={'template'}
|
||||||
|
bom={true}
|
||||||
|
data={getTemplate()}
|
||||||
|
>
|
||||||
|
<div className='flex items-center h-[18px] space-x-1 text-[#155EEF] text-xs font-medium'>
|
||||||
|
<DownloadIcon className='w-3 h-3 mr-1' />
|
||||||
|
{t('datasetDocuments.list.batchModal.template')}
|
||||||
|
</div>
|
||||||
|
</CSVDownloader>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
)
|
||||||
|
}
|
||||||
|
export default React.memo(CSVDownload)
|
@ -0,0 +1,126 @@
|
|||||||
|
'use client'
|
||||||
|
import type { FC } from 'react'
|
||||||
|
import React, { useEffect, useRef, useState } from 'react'
|
||||||
|
import cn from 'classnames'
|
||||||
|
import { useTranslation } from 'react-i18next'
|
||||||
|
import { useContext } from 'use-context-selector'
|
||||||
|
import { Csv as CSVIcon } from '@/app/components/base/icons/src/public/files'
|
||||||
|
import { ToastContext } from '@/app/components/base/toast'
|
||||||
|
import { Trash03 } from '@/app/components/base/icons/src/vender/line/general'
|
||||||
|
import Button from '@/app/components/base/button'
|
||||||
|
|
||||||
|
export type Props = {
|
||||||
|
file: File | undefined
|
||||||
|
updateFile: (file?: File) => void
|
||||||
|
}
|
||||||
|
|
||||||
|
const CSVUploader: FC<Props> = ({
|
||||||
|
file,
|
||||||
|
updateFile,
|
||||||
|
}) => {
|
||||||
|
const { t } = useTranslation()
|
||||||
|
const { notify } = useContext(ToastContext)
|
||||||
|
const [dragging, setDragging] = useState(false)
|
||||||
|
const dropRef = useRef<HTMLDivElement>(null)
|
||||||
|
const dragRef = useRef<HTMLDivElement>(null)
|
||||||
|
const fileUploader = useRef<HTMLInputElement>(null)
|
||||||
|
|
||||||
|
const handleDragEnter = (e: DragEvent) => {
|
||||||
|
e.preventDefault()
|
||||||
|
e.stopPropagation()
|
||||||
|
e.target !== dragRef.current && setDragging(true)
|
||||||
|
}
|
||||||
|
const handleDragOver = (e: DragEvent) => {
|
||||||
|
e.preventDefault()
|
||||||
|
e.stopPropagation()
|
||||||
|
}
|
||||||
|
const handleDragLeave = (e: DragEvent) => {
|
||||||
|
e.preventDefault()
|
||||||
|
e.stopPropagation()
|
||||||
|
e.target === dragRef.current && setDragging(false)
|
||||||
|
}
|
||||||
|
const handleDrop = (e: DragEvent) => {
|
||||||
|
e.preventDefault()
|
||||||
|
e.stopPropagation()
|
||||||
|
setDragging(false)
|
||||||
|
if (!e.dataTransfer)
|
||||||
|
return
|
||||||
|
const files = [...e.dataTransfer.files]
|
||||||
|
if (files.length > 1) {
|
||||||
|
notify({ type: 'error', message: t('datasetCreation.stepOne.uploader.validation.count') })
|
||||||
|
return
|
||||||
|
}
|
||||||
|
updateFile(files[0])
|
||||||
|
}
|
||||||
|
const selectHandle = () => {
|
||||||
|
if (fileUploader.current)
|
||||||
|
fileUploader.current.click()
|
||||||
|
}
|
||||||
|
const removeFile = () => {
|
||||||
|
if (fileUploader.current)
|
||||||
|
fileUploader.current.value = ''
|
||||||
|
updateFile()
|
||||||
|
}
|
||||||
|
const fileChangeHandle = (e: React.ChangeEvent<HTMLInputElement>) => {
|
||||||
|
const currentFile = e.target.files?.[0]
|
||||||
|
updateFile(currentFile)
|
||||||
|
}
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
dropRef.current?.addEventListener('dragenter', handleDragEnter)
|
||||||
|
dropRef.current?.addEventListener('dragover', handleDragOver)
|
||||||
|
dropRef.current?.addEventListener('dragleave', handleDragLeave)
|
||||||
|
dropRef.current?.addEventListener('drop', handleDrop)
|
||||||
|
return () => {
|
||||||
|
dropRef.current?.removeEventListener('dragenter', handleDragEnter)
|
||||||
|
dropRef.current?.removeEventListener('dragover', handleDragOver)
|
||||||
|
dropRef.current?.removeEventListener('dragleave', handleDragLeave)
|
||||||
|
dropRef.current?.removeEventListener('drop', handleDrop)
|
||||||
|
}
|
||||||
|
}, [])
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className='mt-6'>
|
||||||
|
<input
|
||||||
|
ref={fileUploader}
|
||||||
|
style={{ display: 'none' }}
|
||||||
|
type="file"
|
||||||
|
id="fileUploader"
|
||||||
|
accept='.csv'
|
||||||
|
onChange={fileChangeHandle}
|
||||||
|
/>
|
||||||
|
<div ref={dropRef}>
|
||||||
|
{!file && (
|
||||||
|
<div className={cn('flex items-center h-20 rounded-xl bg-gray-50 border border-dashed border-gray-200 text-sm font-normal', dragging && 'bg-[#F5F8FF] border border-[#B2CCFF]')}>
|
||||||
|
<div className='w-full flex items-center justify-center space-x-2'>
|
||||||
|
<CSVIcon className="shrink-0" />
|
||||||
|
<div className='text-gray-500'>
|
||||||
|
{t('datasetDocuments.list.batchModal.csvUploadTitle')}
|
||||||
|
<span className='text-primary-400 cursor-pointer' onClick={selectHandle}>{t('datasetDocuments.list.batchModal.browse')}</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{dragging && <div ref={dragRef} className='absolute w-full h-full top-0 left-0'/>}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{file && (
|
||||||
|
<div className={cn('flex items-center h-20 px-6 rounded-xl bg-gray-50 border border-gray-200 text-sm font-normal group', 'hover:bg-[#F5F8FF] hover:border-[#B2CCFF]')}>
|
||||||
|
<CSVIcon className="shrink-0" />
|
||||||
|
<div className='flex ml-2 w-0 grow'>
|
||||||
|
<span className='max-w-[calc(100%_-_30px)] text-ellipsis whitespace-nowrap overflow-hidden text-gray-800'>{file.name.replace(/.csv$/, '')}</span>
|
||||||
|
<span className='shrink-0 text-gray-500'>.csv</span>
|
||||||
|
</div>
|
||||||
|
<div className='hidden group-hover:flex items-center'>
|
||||||
|
<Button className='!h-8 !px-3 !py-[6px] bg-white !text-[13px] !leading-[18px] text-gray-700' onClick={selectHandle}>{t('datasetCreation.stepOne.uploader.change')}</Button>
|
||||||
|
<div className='mx-2 w-px h-4 bg-gray-200' />
|
||||||
|
<div className='p-2 cursor-pointer' onClick={removeFile}>
|
||||||
|
<Trash03 className='w-4 h-4 text-gray-500' />
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
export default React.memo(CSVUploader)
|
@ -0,0 +1,65 @@
|
|||||||
|
'use client'
|
||||||
|
import type { FC } from 'react'
|
||||||
|
import React, { useEffect, useState } from 'react'
|
||||||
|
import { useTranslation } from 'react-i18next'
|
||||||
|
import CSVUploader from './csv-uploader'
|
||||||
|
import CSVDownloader from './csv-downloader'
|
||||||
|
import Button from '@/app/components/base/button'
|
||||||
|
import Modal from '@/app/components/base/modal'
|
||||||
|
import { XClose } from '@/app/components/base/icons/src/vender/line/general'
|
||||||
|
import type { DocForm } from '@/models/datasets'
|
||||||
|
|
||||||
|
export type IBatchModalProps = {
|
||||||
|
isShow: boolean
|
||||||
|
docForm: DocForm
|
||||||
|
onCancel: () => void
|
||||||
|
onConfirm: (file: File) => void
|
||||||
|
}
|
||||||
|
|
||||||
|
const BatchModal: FC<IBatchModalProps> = ({
|
||||||
|
isShow,
|
||||||
|
docForm,
|
||||||
|
onCancel,
|
||||||
|
onConfirm,
|
||||||
|
}) => {
|
||||||
|
const { t } = useTranslation()
|
||||||
|
const [currentCSV, setCurrentCSV] = useState<File>()
|
||||||
|
const handleFile = (file?: File) => setCurrentCSV(file)
|
||||||
|
|
||||||
|
const handleSend = () => {
|
||||||
|
if (!currentCSV)
|
||||||
|
return
|
||||||
|
onCancel()
|
||||||
|
onConfirm(currentCSV)
|
||||||
|
}
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (!isShow)
|
||||||
|
setCurrentCSV(undefined)
|
||||||
|
}, [isShow])
|
||||||
|
|
||||||
|
return (
|
||||||
|
<Modal isShow={isShow} onClose={() => {}} className='px-8 py-6 !max-w-[520px] !rounded-xl'>
|
||||||
|
<div className='relative pb-1 text-xl font-medium leading-[30px] text-gray-900'>{t('datasetDocuments.list.batchModal.title')}</div>
|
||||||
|
<div className='absolute right-4 top-4 p-2 cursor-pointer' onClick={onCancel}>
|
||||||
|
<XClose className='w-4 h-4 text-gray-500' />
|
||||||
|
</div>
|
||||||
|
<CSVUploader
|
||||||
|
file={currentCSV}
|
||||||
|
updateFile={handleFile}
|
||||||
|
/>
|
||||||
|
<CSVDownloader
|
||||||
|
docForm={docForm}
|
||||||
|
/>
|
||||||
|
<div className='mt-[28px] pt-6 flex justify-end'>
|
||||||
|
<Button className='mr-2 text-gray-700 text-sm font-medium' onClick={onCancel}>
|
||||||
|
{t('datasetDocuments.list.batchModal.cancel')}
|
||||||
|
</Button>
|
||||||
|
<Button className='text-sm font-medium' type="primary" onClick={handleSend} disabled={!currentCSV}>
|
||||||
|
{t('datasetDocuments.list.batchModal.run')}
|
||||||
|
</Button>
|
||||||
|
</div>
|
||||||
|
</Modal>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
export default React.memo(BatchModal)
|
@ -13,6 +13,9 @@ type IInfiniteVirtualListProps = {
|
|||||||
loadNextPage: () => Promise<any> // Callback function responsible for loading the next page of items.
|
loadNextPage: () => Promise<any> // Callback function responsible for loading the next page of items.
|
||||||
onClick: (detail: SegmentDetailModel) => void
|
onClick: (detail: SegmentDetailModel) => void
|
||||||
onChangeSwitch: (segId: string, enabled: boolean) => Promise<void>
|
onChangeSwitch: (segId: string, enabled: boolean) => Promise<void>
|
||||||
|
onDelete: (segId: string) => Promise<void>
|
||||||
|
archived?: boolean
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const InfiniteVirtualList: FC<IInfiniteVirtualListProps> = ({
|
const InfiniteVirtualList: FC<IInfiniteVirtualListProps> = ({
|
||||||
@ -22,6 +25,8 @@ const InfiniteVirtualList: FC<IInfiniteVirtualListProps> = ({
|
|||||||
loadNextPage,
|
loadNextPage,
|
||||||
onClick: onClickCard,
|
onClick: onClickCard,
|
||||||
onChangeSwitch,
|
onChangeSwitch,
|
||||||
|
onDelete,
|
||||||
|
archived,
|
||||||
}) => {
|
}) => {
|
||||||
// If there are more items to be loaded then add an extra row to hold a loading indicator.
|
// If there are more items to be loaded then add an extra row to hold a loading indicator.
|
||||||
const itemCount = hasNextPage ? items.length + 1 : items.length
|
const itemCount = hasNextPage ? items.length + 1 : items.length
|
||||||
@ -52,7 +57,9 @@ const InfiniteVirtualList: FC<IInfiniteVirtualListProps> = ({
|
|||||||
detail={segItem}
|
detail={segItem}
|
||||||
onClick={() => onClickCard(segItem)}
|
onClick={() => onClickCard(segItem)}
|
||||||
onChangeSwitch={onChangeSwitch}
|
onChangeSwitch={onChangeSwitch}
|
||||||
|
onDelete={onDelete}
|
||||||
loading={false}
|
loading={false}
|
||||||
|
archived={archived}
|
||||||
/>
|
/>
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
import type { FC } from 'react'
|
import type { FC } from 'react'
|
||||||
import React from 'react'
|
import React, { useState } from 'react'
|
||||||
import cn from 'classnames'
|
import cn from 'classnames'
|
||||||
import { ArrowUpRightIcon } from '@heroicons/react/24/outline'
|
import { ArrowUpRightIcon } from '@heroicons/react/24/outline'
|
||||||
import { useTranslation } from 'react-i18next'
|
import { useTranslation } from 'react-i18next'
|
||||||
@ -7,11 +7,15 @@ import { StatusItem } from '../../list'
|
|||||||
import { DocumentTitle } from '../index'
|
import { DocumentTitle } from '../index'
|
||||||
import s from './style.module.css'
|
import s from './style.module.css'
|
||||||
import { SegmentIndexTag } from './index'
|
import { SegmentIndexTag } from './index'
|
||||||
|
import Modal from '@/app/components/base/modal'
|
||||||
|
import Button from '@/app/components/base/button'
|
||||||
import Switch from '@/app/components/base/switch'
|
import Switch from '@/app/components/base/switch'
|
||||||
import Divider from '@/app/components/base/divider'
|
import Divider from '@/app/components/base/divider'
|
||||||
import Indicator from '@/app/components/header/indicator'
|
import Indicator from '@/app/components/header/indicator'
|
||||||
import { formatNumber } from '@/utils/format'
|
import { formatNumber } from '@/utils/format'
|
||||||
import type { SegmentDetailModel } from '@/models/datasets'
|
import type { SegmentDetailModel } from '@/models/datasets'
|
||||||
|
import { AlertCircle } from '@/app/components/base/icons/src/vender/solid/alertsAndFeedback'
|
||||||
|
import { Trash03 } from '@/app/components/base/icons/src/vender/line/general'
|
||||||
|
|
||||||
const ProgressBar: FC<{ percent: number; loading: boolean }> = ({ percent, loading }) => {
|
const ProgressBar: FC<{ percent: number; loading: boolean }> = ({ percent, loading }) => {
|
||||||
return (
|
return (
|
||||||
@ -35,8 +39,10 @@ type ISegmentCardProps = {
|
|||||||
score?: number
|
score?: number
|
||||||
onClick?: () => void
|
onClick?: () => void
|
||||||
onChangeSwitch?: (segId: string, enabled: boolean) => Promise<void>
|
onChangeSwitch?: (segId: string, enabled: boolean) => Promise<void>
|
||||||
|
onDelete?: (segId: string) => Promise<void>
|
||||||
scene?: UsageScene
|
scene?: UsageScene
|
||||||
className?: string
|
className?: string
|
||||||
|
archived?: boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
const SegmentCard: FC<ISegmentCardProps> = ({
|
const SegmentCard: FC<ISegmentCardProps> = ({
|
||||||
@ -44,9 +50,11 @@ const SegmentCard: FC<ISegmentCardProps> = ({
|
|||||||
score,
|
score,
|
||||||
onClick,
|
onClick,
|
||||||
onChangeSwitch,
|
onChangeSwitch,
|
||||||
|
onDelete,
|
||||||
loading = true,
|
loading = true,
|
||||||
scene = 'doc',
|
scene = 'doc',
|
||||||
className = '',
|
className = '',
|
||||||
|
archived,
|
||||||
}) => {
|
}) => {
|
||||||
const { t } = useTranslation()
|
const { t } = useTranslation()
|
||||||
const {
|
const {
|
||||||
@ -60,6 +68,7 @@ const SegmentCard: FC<ISegmentCardProps> = ({
|
|||||||
answer,
|
answer,
|
||||||
} = detail as any
|
} = detail as any
|
||||||
const isDocScene = scene === 'doc'
|
const isDocScene = scene === 'doc'
|
||||||
|
const [showModal, setShowModal] = useState(false)
|
||||||
|
|
||||||
const renderContent = () => {
|
const renderContent = () => {
|
||||||
if (answer) {
|
if (answer) {
|
||||||
@ -86,7 +95,7 @@ const SegmentCard: FC<ISegmentCardProps> = ({
|
|||||||
s.segWrapper,
|
s.segWrapper,
|
||||||
(isDocScene && !enabled) ? 'bg-gray-25' : '',
|
(isDocScene && !enabled) ? 'bg-gray-25' : '',
|
||||||
'group',
|
'group',
|
||||||
!loading ? 'pb-4' : '',
|
!loading ? 'pb-4 hover:pb-[10px]' : '',
|
||||||
className,
|
className,
|
||||||
)}
|
)}
|
||||||
onClick={() => onClick?.()}
|
onClick={() => onClick?.()}
|
||||||
@ -116,6 +125,7 @@ const SegmentCard: FC<ISegmentCardProps> = ({
|
|||||||
>
|
>
|
||||||
<Switch
|
<Switch
|
||||||
size='md'
|
size='md'
|
||||||
|
disabled={archived}
|
||||||
defaultValue={enabled}
|
defaultValue={enabled}
|
||||||
onChange={async (val) => {
|
onChange={async (val) => {
|
||||||
await onChangeSwitch?.(id, val)
|
await onChangeSwitch?.(id, val)
|
||||||
@ -159,10 +169,18 @@ const SegmentCard: FC<ISegmentCardProps> = ({
|
|||||||
<div className={cn(s.commonIcon, s.targetIcon)} />
|
<div className={cn(s.commonIcon, s.targetIcon)} />
|
||||||
<div className={s.segDataText}>{formatNumber(hit_count)}</div>
|
<div className={s.segDataText}>{formatNumber(hit_count)}</div>
|
||||||
</div>
|
</div>
|
||||||
<div className="flex items-center">
|
<div className="grow flex items-center">
|
||||||
<div className={cn(s.commonIcon, s.bezierCurveIcon)} />
|
<div className={cn(s.commonIcon, s.bezierCurveIcon)} />
|
||||||
<div className={s.segDataText}>{index_node_hash}</div>
|
<div className={s.segDataText}>{index_node_hash}</div>
|
||||||
</div>
|
</div>
|
||||||
|
{!archived && (
|
||||||
|
<div className='shrink-0 w-6 h-6 flex items-center justify-center rounded-md hover:bg-red-100 hover:text-red-600 cursor-pointer group/delete' onClick={(e) => {
|
||||||
|
e.stopPropagation()
|
||||||
|
setShowModal(true)
|
||||||
|
}}>
|
||||||
|
<Trash03 className='w-[14px] h-[14px] text-gray-500 group-hover/delete:text-red-600' />
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
</>
|
</>
|
||||||
: <>
|
: <>
|
||||||
@ -187,6 +205,26 @@ const SegmentCard: FC<ISegmentCardProps> = ({
|
|||||||
</div>
|
</div>
|
||||||
</>
|
</>
|
||||||
)}
|
)}
|
||||||
|
{showModal && <Modal isShow={showModal} onClose={() => setShowModal(false)} className={s.delModal} closable>
|
||||||
|
<div>
|
||||||
|
<div className={s.warningWrapper}>
|
||||||
|
<AlertCircle className='w-6 h-6 text-red-600' />
|
||||||
|
</div>
|
||||||
|
<div className='text-xl font-semibold text-gray-900 mb-1'>{t('datasetDocuments.segment.delete')}</div>
|
||||||
|
<div className='flex gap-2 justify-end'>
|
||||||
|
<Button onClick={() => setShowModal(false)}>{t('common.operation.cancel')}</Button>
|
||||||
|
<Button
|
||||||
|
type='warning'
|
||||||
|
onClick={async () => {
|
||||||
|
await onDelete?.(id)
|
||||||
|
}}
|
||||||
|
className='border-red-700 border-[0.5px]'
|
||||||
|
>
|
||||||
|
{t('common.operation.sure')}
|
||||||
|
</Button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</Modal>}
|
||||||
</div>
|
</div>
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
@ -8,6 +8,7 @@ import { debounce, isNil, omitBy } from 'lodash-es'
|
|||||||
import cn from 'classnames'
|
import cn from 'classnames'
|
||||||
import { StatusItem } from '../../list'
|
import { StatusItem } from '../../list'
|
||||||
import { DocumentContext } from '../index'
|
import { DocumentContext } from '../index'
|
||||||
|
import { ProcessStatus } from '../segment-add'
|
||||||
import s from './style.module.css'
|
import s from './style.module.css'
|
||||||
import InfiniteVirtualList from './InfiniteVirtualList'
|
import InfiniteVirtualList from './InfiniteVirtualList'
|
||||||
import { formatNumber } from '@/utils/format'
|
import { formatNumber } from '@/utils/format'
|
||||||
@ -18,7 +19,7 @@ import Input from '@/app/components/base/input'
|
|||||||
import { ToastContext } from '@/app/components/base/toast'
|
import { ToastContext } from '@/app/components/base/toast'
|
||||||
import type { Item } from '@/app/components/base/select'
|
import type { Item } from '@/app/components/base/select'
|
||||||
import { SimpleSelect } from '@/app/components/base/select'
|
import { SimpleSelect } from '@/app/components/base/select'
|
||||||
import { disableSegment, enableSegment, fetchSegments, updateSegment } from '@/service/datasets'
|
import { deleteSegment, disableSegment, enableSegment, fetchSegments, updateSegment } from '@/service/datasets'
|
||||||
import type { SegmentDetailModel, SegmentUpdator, SegmentsQuery, SegmentsResponse } from '@/models/datasets'
|
import type { SegmentDetailModel, SegmentUpdator, SegmentsQuery, SegmentsResponse } from '@/models/datasets'
|
||||||
import { asyncRunSafe } from '@/utils'
|
import { asyncRunSafe } from '@/utils'
|
||||||
import type { CommonResponse } from '@/models/common'
|
import type { CommonResponse } from '@/models/common'
|
||||||
@ -48,12 +49,14 @@ type ISegmentDetailProps = {
|
|||||||
onChangeSwitch?: (segId: string, enabled: boolean) => Promise<void>
|
onChangeSwitch?: (segId: string, enabled: boolean) => Promise<void>
|
||||||
onUpdate: (segmentId: string, q: string, a: string, k: string[]) => void
|
onUpdate: (segmentId: string, q: string, a: string, k: string[]) => void
|
||||||
onCancel: () => void
|
onCancel: () => void
|
||||||
|
archived?: boolean
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Show all the contents of the segment
|
* Show all the contents of the segment
|
||||||
*/
|
*/
|
||||||
export const SegmentDetail: FC<ISegmentDetailProps> = memo(({
|
export const SegmentDetail: FC<ISegmentDetailProps> = memo(({
|
||||||
segInfo,
|
segInfo,
|
||||||
|
archived,
|
||||||
onChangeSwitch,
|
onChangeSwitch,
|
||||||
onUpdate,
|
onUpdate,
|
||||||
onCancel,
|
onCancel,
|
||||||
@ -116,9 +119,7 @@ export const SegmentDetail: FC<ISegmentDetailProps> = memo(({
|
|||||||
return (
|
return (
|
||||||
<div className={'flex flex-col relative'}>
|
<div className={'flex flex-col relative'}>
|
||||||
<div className='absolute right-0 top-0 flex items-center h-7'>
|
<div className='absolute right-0 top-0 flex items-center h-7'>
|
||||||
{
|
{isEditing && (
|
||||||
isEditing
|
|
||||||
? (
|
|
||||||
<>
|
<>
|
||||||
<Button
|
<Button
|
||||||
className='mr-2 !h-7 !px-3 !py-[5px] text-xs font-medium text-gray-700 !rounded-md'
|
className='mr-2 !h-7 !px-3 !py-[5px] text-xs font-medium text-gray-700 !rounded-md'
|
||||||
@ -132,15 +133,16 @@ export const SegmentDetail: FC<ISegmentDetailProps> = memo(({
|
|||||||
{t('common.operation.save')}
|
{t('common.operation.save')}
|
||||||
</Button>
|
</Button>
|
||||||
</>
|
</>
|
||||||
)
|
)}
|
||||||
: (
|
{!isEditing && !archived && (
|
||||||
|
<>
|
||||||
<div className='group relative flex justify-center items-center w-6 h-6 hover:bg-gray-100 rounded-md cursor-pointer'>
|
<div className='group relative flex justify-center items-center w-6 h-6 hover:bg-gray-100 rounded-md cursor-pointer'>
|
||||||
<div className={cn(s.editTip, 'hidden items-center absolute -top-10 px-3 h-[34px] bg-white rounded-lg whitespace-nowrap text-xs font-semibold text-gray-700 group-hover:flex')}>{t('common.operation.edit')}</div>
|
<div className={cn(s.editTip, 'hidden items-center absolute -top-10 px-3 h-[34px] bg-white rounded-lg whitespace-nowrap text-xs font-semibold text-gray-700 group-hover:flex')}>{t('common.operation.edit')}</div>
|
||||||
<Edit03 className='w-4 h-4 text-gray-500' onClick={() => setIsEditing(true)} />
|
<Edit03 className='w-4 h-4 text-gray-500' onClick={() => setIsEditing(true)} />
|
||||||
</div>
|
</div>
|
||||||
)
|
|
||||||
}
|
|
||||||
<div className='mx-3 w-[1px] h-3 bg-gray-200' />
|
<div className='mx-3 w-[1px] h-3 bg-gray-200' />
|
||||||
|
</>
|
||||||
|
)}
|
||||||
<div className='flex justify-center items-center w-6 h-6 cursor-pointer' onClick={onCancel}>
|
<div className='flex justify-center items-center w-6 h-6 cursor-pointer' onClick={onCancel}>
|
||||||
<XClose className='w-4 h-4 text-gray-500' />
|
<XClose className='w-4 h-4 text-gray-500' />
|
||||||
</div>
|
</div>
|
||||||
@ -176,6 +178,7 @@ export const SegmentDetail: FC<ISegmentDetailProps> = memo(({
|
|||||||
onChange={async (val) => {
|
onChange={async (val) => {
|
||||||
await onChangeSwitch?.(segInfo?.id || '', val)
|
await onChangeSwitch?.(segInfo?.id || '', val)
|
||||||
}}
|
}}
|
||||||
|
disabled={archived}
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@ -195,13 +198,20 @@ export const splitArray = (arr: any[], size = 3) => {
|
|||||||
type ICompletedProps = {
|
type ICompletedProps = {
|
||||||
showNewSegmentModal: boolean
|
showNewSegmentModal: boolean
|
||||||
onNewSegmentModalChange: (state: boolean) => void
|
onNewSegmentModalChange: (state: boolean) => void
|
||||||
|
importStatus: ProcessStatus | string | undefined
|
||||||
|
archived?: boolean
|
||||||
// data: Array<{}> // all/part segments
|
// data: Array<{}> // all/part segments
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Embedding done, show list of all segments
|
* Embedding done, show list of all segments
|
||||||
* Support search and filter
|
* Support search and filter
|
||||||
*/
|
*/
|
||||||
const Completed: FC<ICompletedProps> = ({ showNewSegmentModal, onNewSegmentModalChange }) => {
|
const Completed: FC<ICompletedProps> = ({
|
||||||
|
showNewSegmentModal,
|
||||||
|
onNewSegmentModalChange,
|
||||||
|
importStatus,
|
||||||
|
archived,
|
||||||
|
}) => {
|
||||||
const { t } = useTranslation()
|
const { t } = useTranslation()
|
||||||
const { notify } = useContext(ToastContext)
|
const { notify } = useContext(ToastContext)
|
||||||
const { datasetId = '', documentId = '', docForm } = useContext(DocumentContext)
|
const { datasetId = '', documentId = '', docForm } = useContext(DocumentContext)
|
||||||
@ -250,11 +260,6 @@ const Completed: FC<ICompletedProps> = ({ showNewSegmentModal, onNewSegmentModal
|
|||||||
getSegments(false)
|
getSegments(false)
|
||||||
}
|
}
|
||||||
|
|
||||||
useEffect(() => {
|
|
||||||
if (lastSegmentsRes !== undefined)
|
|
||||||
getSegments(false)
|
|
||||||
}, [selectedStatus, searchValue])
|
|
||||||
|
|
||||||
const onClickCard = (detail: SegmentDetailModel) => {
|
const onClickCard = (detail: SegmentDetailModel) => {
|
||||||
setCurrSegment({ segInfo: detail, showModal: true })
|
setCurrSegment({ segInfo: detail, showModal: true })
|
||||||
}
|
}
|
||||||
@ -281,6 +286,17 @@ const Completed: FC<ICompletedProps> = ({ showNewSegmentModal, onNewSegmentModal
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const onDelete = async (segId: string) => {
|
||||||
|
const [e] = await asyncRunSafe<CommonResponse>(deleteSegment({ datasetId, documentId, segmentId: segId }) as Promise<CommonResponse>)
|
||||||
|
if (!e) {
|
||||||
|
notify({ type: 'success', message: t('common.actionMsg.modifiedSuccessfully') })
|
||||||
|
resetList()
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
notify({ type: 'error', message: t('common.actionMsg.modificationFailed') })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const handleUpdateSegment = async (segmentId: string, question: string, answer: string, keywords: string[]) => {
|
const handleUpdateSegment = async (segmentId: string, question: string, answer: string, keywords: string[]) => {
|
||||||
const params: SegmentUpdator = { content: '' }
|
const params: SegmentUpdator = { content: '' }
|
||||||
if (docForm === 'qa_model') {
|
if (docForm === 'qa_model') {
|
||||||
@ -321,6 +337,16 @@ const Completed: FC<ICompletedProps> = ({ showNewSegmentModal, onNewSegmentModal
|
|||||||
setAllSegments([...allSegments])
|
setAllSegments([...allSegments])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (lastSegmentsRes !== undefined)
|
||||||
|
getSegments(false)
|
||||||
|
}, [selectedStatus, searchValue])
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (importStatus === ProcessStatus.COMPLETED)
|
||||||
|
resetList()
|
||||||
|
}, [importStatus])
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<>
|
<>
|
||||||
<div className={s.docSearchWrapper}>
|
<div className={s.docSearchWrapper}>
|
||||||
@ -343,7 +369,9 @@ const Completed: FC<ICompletedProps> = ({ showNewSegmentModal, onNewSegmentModal
|
|||||||
items={allSegments}
|
items={allSegments}
|
||||||
loadNextPage={getSegments}
|
loadNextPage={getSegments}
|
||||||
onChangeSwitch={onChangeSwitch}
|
onChangeSwitch={onChangeSwitch}
|
||||||
|
onDelete={onDelete}
|
||||||
onClick={onClickCard}
|
onClick={onClickCard}
|
||||||
|
archived={archived}
|
||||||
/>
|
/>
|
||||||
<Modal isShow={currSegment.showModal} onClose={() => {}} className='!max-w-[640px] !overflow-visible'>
|
<Modal isShow={currSegment.showModal} onClose={() => {}} className='!max-w-[640px] !overflow-visible'>
|
||||||
<SegmentDetail
|
<SegmentDetail
|
||||||
@ -351,6 +379,7 @@ const Completed: FC<ICompletedProps> = ({ showNewSegmentModal, onNewSegmentModal
|
|||||||
onChangeSwitch={onChangeSwitch}
|
onChangeSwitch={onChangeSwitch}
|
||||||
onUpdate={handleUpdateSegment}
|
onUpdate={handleUpdateSegment}
|
||||||
onCancel={onCloseModal}
|
onCancel={onCloseModal}
|
||||||
|
archived={archived}
|
||||||
/>
|
/>
|
||||||
</Modal>
|
</Modal>
|
||||||
<NewSegmentModal
|
<NewSegmentModal
|
||||||
|
@ -132,3 +132,24 @@
|
|||||||
.editTip {
|
.editTip {
|
||||||
box-shadow: 0px 4px 6px -2px rgba(16, 24, 40, 0.03), 0px 12px 16px -4px rgba(16, 24, 40, 0.08);
|
box-shadow: 0px 4px 6px -2px rgba(16, 24, 40, 0.03), 0px 12px 16px -4px rgba(16, 24, 40, 0.08);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.delModal {
|
||||||
|
background: linear-gradient(
|
||||||
|
180deg,
|
||||||
|
rgba(217, 45, 32, 0.05) 0%,
|
||||||
|
rgba(217, 45, 32, 0) 24.02%
|
||||||
|
),
|
||||||
|
#f9fafb;
|
||||||
|
box-shadow: 0px 20px 24px -4px rgba(16, 24, 40, 0.08),
|
||||||
|
0px 8px 8px -4px rgba(16, 24, 40, 0.03);
|
||||||
|
@apply rounded-2xl p-8;
|
||||||
|
}
|
||||||
|
.warningWrapper {
|
||||||
|
box-shadow: 0px 20px 24px -4px rgba(16, 24, 40, 0.08),
|
||||||
|
0px 8px 8px -4px rgba(16, 24, 40, 0.03);
|
||||||
|
background: rgba(255, 255, 255, 0.9);
|
||||||
|
@apply h-12 w-12 border-[0.5px] border-gray-100 rounded-xl mb-3 flex items-center justify-center;
|
||||||
|
}
|
||||||
|
.warningIcon {
|
||||||
|
@apply w-[22px] h-[22px] fill-current text-red-600;
|
||||||
|
}
|
||||||
|
@ -3,7 +3,7 @@ import type { FC } from 'react'
|
|||||||
import React, { useState } from 'react'
|
import React, { useState } from 'react'
|
||||||
import useSWR from 'swr'
|
import useSWR from 'swr'
|
||||||
import { ArrowLeftIcon } from '@heroicons/react/24/solid'
|
import { ArrowLeftIcon } from '@heroicons/react/24/solid'
|
||||||
import { createContext } from 'use-context-selector'
|
import { createContext, useContext } from 'use-context-selector'
|
||||||
import { useTranslation } from 'react-i18next'
|
import { useTranslation } from 'react-i18next'
|
||||||
import { useRouter } from 'next/navigation'
|
import { useRouter } from 'next/navigation'
|
||||||
import { omit } from 'lodash-es'
|
import { omit } from 'lodash-es'
|
||||||
@ -13,19 +13,15 @@ import s from '../style.module.css'
|
|||||||
import Completed from './completed'
|
import Completed from './completed'
|
||||||
import Embedding from './embedding'
|
import Embedding from './embedding'
|
||||||
import Metadata from './metadata'
|
import Metadata from './metadata'
|
||||||
|
import SegmentAdd, { ProcessStatus } from './segment-add'
|
||||||
|
import BatchModal from './batch-modal'
|
||||||
import style from './style.module.css'
|
import style from './style.module.css'
|
||||||
import Divider from '@/app/components/base/divider'
|
import Divider from '@/app/components/base/divider'
|
||||||
import Loading from '@/app/components/base/loading'
|
import Loading from '@/app/components/base/loading'
|
||||||
import type { MetadataType } from '@/service/datasets'
|
import type { MetadataType } from '@/service/datasets'
|
||||||
import { fetchDocumentDetail } from '@/service/datasets'
|
import { checkSegmentBatchImportProgress, fetchDocumentDetail, segmentBatchImport } from '@/service/datasets'
|
||||||
|
import { ToastContext } from '@/app/components/base/toast'
|
||||||
export const BackCircleBtn: FC<{ onClick: () => void }> = ({ onClick }) => {
|
import type { DocForm } from '@/models/datasets'
|
||||||
return (
|
|
||||||
<div onClick={onClick} className={'rounded-full w-8 h-8 flex justify-center items-center border-gray-100 cursor-pointer border hover:border-gray-300 shadow-lg'}>
|
|
||||||
<ArrowLeftIcon className='text-primary-600 fill-current stroke-current h-4 w-4' />
|
|
||||||
</div>
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
export const DocumentContext = createContext<{ datasetId?: string; documentId?: string; docForm: string }>({ docForm: '' })
|
export const DocumentContext = createContext<{ datasetId?: string; documentId?: string; docForm: string }>({ docForm: '' })
|
||||||
|
|
||||||
@ -51,10 +47,45 @@ type Props = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const DocumentDetail: FC<Props> = ({ datasetId, documentId }) => {
|
const DocumentDetail: FC<Props> = ({ datasetId, documentId }) => {
|
||||||
const { t } = useTranslation()
|
|
||||||
const router = useRouter()
|
const router = useRouter()
|
||||||
|
const { t } = useTranslation()
|
||||||
|
const { notify } = useContext(ToastContext)
|
||||||
const [showMetadata, setShowMetadata] = useState(true)
|
const [showMetadata, setShowMetadata] = useState(true)
|
||||||
const [showNewSegmentModal, setShowNewSegmentModal] = useState(false)
|
const [newSegmentModalVisible, setNewSegmentModalVisible] = useState(false)
|
||||||
|
const [batchModalVisible, setBatchModalVisible] = useState(false)
|
||||||
|
const [importStatus, setImportStatus] = useState<ProcessStatus | string>()
|
||||||
|
const showNewSegmentModal = () => setNewSegmentModalVisible(true)
|
||||||
|
const showBatchModal = () => setBatchModalVisible(true)
|
||||||
|
const hideBatchModal = () => setBatchModalVisible(false)
|
||||||
|
const resetProcessStatus = () => setImportStatus('')
|
||||||
|
const checkProcess = async (jobID: string) => {
|
||||||
|
try {
|
||||||
|
const res = await checkSegmentBatchImportProgress({ jobID })
|
||||||
|
setImportStatus(res.job_status)
|
||||||
|
if (res.job_status === ProcessStatus.WAITING || res.job_status === ProcessStatus.PROCESSING)
|
||||||
|
setTimeout(() => checkProcess(res.job_id), 2500)
|
||||||
|
if (res.job_status === ProcessStatus.ERROR)
|
||||||
|
notify({ type: 'error', message: `${t('datasetDocuments.list.batchModal.runError')}` })
|
||||||
|
}
|
||||||
|
catch (e: any) {
|
||||||
|
notify({ type: 'error', message: `${t('datasetDocuments.list.batchModal.runError')}${'message' in e ? `: ${e.message}` : ''}` })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const runBatch = async (csv: File) => {
|
||||||
|
const formData = new FormData()
|
||||||
|
formData.append('file', csv)
|
||||||
|
try {
|
||||||
|
const res = await segmentBatchImport({
|
||||||
|
url: `/datasets/${datasetId}/documents/${documentId}/segments/batch_import`,
|
||||||
|
body: formData,
|
||||||
|
})
|
||||||
|
setImportStatus(res.job_status)
|
||||||
|
checkProcess(res.job_id)
|
||||||
|
}
|
||||||
|
catch (e: any) {
|
||||||
|
notify({ type: 'error', message: `${t('datasetDocuments.list.batchModal.runError')}${'message' in e ? `: ${e.message}` : ''}` })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const { data: documentDetail, error, mutate: detailMutate } = useSWR({
|
const { data: documentDetail, error, mutate: detailMutate } = useSWR({
|
||||||
action: 'fetchDocumentDetail',
|
action: 'fetchDocumentDetail',
|
||||||
@ -91,22 +122,32 @@ const DocumentDetail: FC<Props> = ({ datasetId, documentId }) => {
|
|||||||
<DocumentContext.Provider value={{ datasetId, documentId, docForm: documentDetail?.doc_form || '' }}>
|
<DocumentContext.Provider value={{ datasetId, documentId, docForm: documentDetail?.doc_form || '' }}>
|
||||||
<div className='flex flex-col h-full'>
|
<div className='flex flex-col h-full'>
|
||||||
<div className='flex h-16 border-b-gray-100 border-b items-center p-4'>
|
<div className='flex h-16 border-b-gray-100 border-b items-center p-4'>
|
||||||
<BackCircleBtn onClick={backToPrev} />
|
<div onClick={backToPrev} className={'rounded-full w-8 h-8 flex justify-center items-center border-gray-100 cursor-pointer border hover:border-gray-300 shadow-[0px_12px_16px_-4px_rgba(16,24,40,0.08),0px_4px_6px_-2px_rgba(16,24,40,0.03)]'}>
|
||||||
|
<ArrowLeftIcon className='text-primary-600 fill-current stroke-current h-4 w-4' />
|
||||||
|
</div>
|
||||||
<Divider className='!h-4' type='vertical' />
|
<Divider className='!h-4' type='vertical' />
|
||||||
<DocumentTitle extension={documentDetail?.data_source_info?.upload_file?.extension} name={documentDetail?.name} />
|
<DocumentTitle extension={documentDetail?.data_source_info?.upload_file?.extension} name={documentDetail?.name} />
|
||||||
<StatusItem status={documentDetail?.display_status || 'available'} scene='detail' />
|
<StatusItem status={documentDetail?.display_status || 'available'} scene='detail' />
|
||||||
|
{documentDetail && !documentDetail.archived && (
|
||||||
|
<SegmentAdd
|
||||||
|
importStatus={importStatus}
|
||||||
|
clearProcessStatus={resetProcessStatus}
|
||||||
|
showNewSegmentModal={showNewSegmentModal}
|
||||||
|
showBatchModal={showBatchModal}
|
||||||
|
/>
|
||||||
|
)}
|
||||||
<OperationAction
|
<OperationAction
|
||||||
scene='detail'
|
scene='detail'
|
||||||
detail={{
|
detail={{
|
||||||
enabled: documentDetail?.enabled || false,
|
enabled: documentDetail?.enabled || false,
|
||||||
archived: documentDetail?.archived || false,
|
archived: documentDetail?.archived || false,
|
||||||
id: documentId,
|
id: documentId,
|
||||||
|
data_source_type: documentDetail?.data_source_type || '',
|
||||||
doc_form: documentDetail?.doc_form || '',
|
doc_form: documentDetail?.doc_form || '',
|
||||||
}}
|
}}
|
||||||
datasetId={datasetId}
|
datasetId={datasetId}
|
||||||
onUpdate={handleOperate}
|
onUpdate={handleOperate}
|
||||||
className='!w-[216px]'
|
className='!w-[216px]'
|
||||||
showNewSegmentModal={() => setShowNewSegmentModal(true)}
|
|
||||||
/>
|
/>
|
||||||
<button
|
<button
|
||||||
className={cn(style.layoutRightIcon, showMetadata ? style.iconShow : style.iconClose)}
|
className={cn(style.layoutRightIcon, showMetadata ? style.iconShow : style.iconClose)}
|
||||||
@ -120,8 +161,10 @@ const DocumentDetail: FC<Props> = ({ datasetId, documentId }) => {
|
|||||||
{embedding
|
{embedding
|
||||||
? <Embedding detail={documentDetail} detailUpdate={detailMutate} />
|
? <Embedding detail={documentDetail} detailUpdate={detailMutate} />
|
||||||
: <Completed
|
: <Completed
|
||||||
showNewSegmentModal={showNewSegmentModal}
|
showNewSegmentModal={newSegmentModalVisible}
|
||||||
onNewSegmentModalChange={setShowNewSegmentModal}
|
onNewSegmentModalChange={setNewSegmentModalVisible}
|
||||||
|
importStatus={importStatus}
|
||||||
|
archived={documentDetail?.archived}
|
||||||
/>
|
/>
|
||||||
}
|
}
|
||||||
</div>
|
</div>
|
||||||
@ -132,6 +175,12 @@ const DocumentDetail: FC<Props> = ({ datasetId, documentId }) => {
|
|||||||
onUpdate={metadataMutate}
|
onUpdate={metadataMutate}
|
||||||
/>}
|
/>}
|
||||||
</div>
|
</div>
|
||||||
|
<BatchModal
|
||||||
|
isShow={batchModalVisible}
|
||||||
|
onCancel={hideBatchModal}
|
||||||
|
onConfirm={runBatch}
|
||||||
|
docForm={documentDetail?.doc_form as DocForm}
|
||||||
|
/>
|
||||||
</div>
|
</div>
|
||||||
</DocumentContext.Provider>
|
</DocumentContext.Provider>
|
||||||
)
|
)
|
||||||
|
@ -0,0 +1,84 @@
|
|||||||
|
'use client'
|
||||||
|
import type { FC } from 'react'
|
||||||
|
import React from 'react'
|
||||||
|
import { useTranslation } from 'react-i18next'
|
||||||
|
import cn from 'classnames'
|
||||||
|
import { FilePlus02 } from '@/app/components/base/icons/src/vender/line/files'
|
||||||
|
import { Loading02 } from '@/app/components/base/icons/src/vender/line/general'
|
||||||
|
import { AlertCircle } from '@/app/components/base/icons/src/vender/solid/alertsAndFeedback'
|
||||||
|
import { CheckCircle } from '@/app/components/base/icons/src/vender/solid/general'
|
||||||
|
import Popover from '@/app/components/base/popover'
|
||||||
|
|
||||||
|
export type ISegmentAddProps = {
|
||||||
|
importStatus: ProcessStatus | string | undefined
|
||||||
|
clearProcessStatus: () => void
|
||||||
|
showNewSegmentModal: () => void
|
||||||
|
showBatchModal: () => void
|
||||||
|
}
|
||||||
|
|
||||||
|
export enum ProcessStatus {
|
||||||
|
WAITING = 'waiting',
|
||||||
|
PROCESSING = 'processing',
|
||||||
|
COMPLETED = 'completed',
|
||||||
|
ERROR = 'error',
|
||||||
|
}
|
||||||
|
|
||||||
|
const SegmentAdd: FC<ISegmentAddProps> = ({
|
||||||
|
importStatus,
|
||||||
|
clearProcessStatus,
|
||||||
|
showNewSegmentModal,
|
||||||
|
showBatchModal,
|
||||||
|
}) => {
|
||||||
|
const { t } = useTranslation()
|
||||||
|
|
||||||
|
if (importStatus) {
|
||||||
|
return (
|
||||||
|
<>
|
||||||
|
{(importStatus === ProcessStatus.WAITING || importStatus === ProcessStatus.PROCESSING) && (
|
||||||
|
<div className='relative overflow-hidden inline-flex items-center mr-2 px-3 py-[6px] text-blue-700 bg-[#F5F8FF] rounded-lg border border-black/5'>
|
||||||
|
{importStatus === ProcessStatus.WAITING && <div className='absolute left-0 top-0 w-3/12 h-full bg-[#D1E0FF] z-0'/>}
|
||||||
|
{importStatus === ProcessStatus.PROCESSING && <div className='absolute left-0 top-0 w-2/3 h-full bg-[#D1E0FF] z-0'/>}
|
||||||
|
<Loading02 className='animate-spin mr-2 w-4 h-4' />
|
||||||
|
<span className='font-medium text-[13px] leading-[18px] z-10'>{t('datasetDocuments.list.batchModal.processing')}</span>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{importStatus === ProcessStatus.COMPLETED && (
|
||||||
|
<div className='inline-flex items-center mr-2 px-3 py-[6px] text-gray-700 bg-[#F6FEF9] rounded-lg border border-black/5'>
|
||||||
|
<CheckCircle className='mr-2 w-4 h-4 text-[#039855]' />
|
||||||
|
<span className='font-medium text-[13px] leading-[18px]'>{t('datasetDocuments.list.batchModal.completed')}</span>
|
||||||
|
<span className='pl-2 font-medium text-[13px] leading-[18px] text-[#155EEF] cursor-pointer' onClick={clearProcessStatus}>{t('datasetDocuments.list.batchModal.ok')}</span>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{importStatus === ProcessStatus.ERROR && (
|
||||||
|
<div className='inline-flex items-center mr-2 px-3 py-[6px] text-red-600 bg-red-100 rounded-lg border border-black/5'>
|
||||||
|
<AlertCircle className='mr-2 w-4 h-4 text-[#D92D20]' />
|
||||||
|
<span className='font-medium text-[13px] leading-[18px]'>{t('datasetDocuments.list.batchModal.error')}</span>
|
||||||
|
<span className='pl-2 font-medium text-[13px] leading-[18px] text-[#155EEF] cursor-pointer' onClick={clearProcessStatus}>{t('datasetDocuments.list.batchModal.ok')}</span>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<Popover
|
||||||
|
manualClose
|
||||||
|
trigger='click'
|
||||||
|
htmlContent={
|
||||||
|
<div className='w-full py-1'>
|
||||||
|
<div className='py-2 px-3 mx-1 flex items-center gap-2 hover:bg-gray-100 rounded-lg cursor-pointer text-gray-700 text-sm' onClick={showNewSegmentModal}>{t('datasetDocuments.list.action.add')}</div>
|
||||||
|
<div className='py-2 px-3 mx-1 flex items-center gap-2 hover:bg-gray-100 rounded-lg cursor-pointer text-gray-700 text-sm' onClick={showBatchModal}>{t('datasetDocuments.list.action.batchAdd')}</div>
|
||||||
|
</div>
|
||||||
|
}
|
||||||
|
btnElement={
|
||||||
|
<div className='inline-flex items-center'>
|
||||||
|
<FilePlus02 className='w-4 h-4 text-gray-700' />
|
||||||
|
<span className='pl-1'>{t('datasetDocuments.list.action.addButton')}</span>
|
||||||
|
</div>
|
||||||
|
}
|
||||||
|
btnClassName={open => cn('mr-2 !py-[6px] !text-[13px] !leading-[18px] hover:bg-gray-50 border border-gray-200 hover:border-gray-300 hover:shadow-[0_1px_2px_rgba(16,24,40,0.05)]', open ? '!bg-gray-100 !shadow-none' : '!bg-transparent')}
|
||||||
|
className='!w-[132px] h-fit !z-20 !translate-x-0 !left-0'
|
||||||
|
/>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
export default React.memo(SegmentAdd)
|
@ -22,12 +22,12 @@ import type { IndicatorProps } from '@/app/components/header/indicator'
|
|||||||
import Indicator from '@/app/components/header/indicator'
|
import Indicator from '@/app/components/header/indicator'
|
||||||
import { asyncRunSafe } from '@/utils'
|
import { asyncRunSafe } from '@/utils'
|
||||||
import { formatNumber } from '@/utils/format'
|
import { formatNumber } from '@/utils/format'
|
||||||
import { archiveDocument, deleteDocument, disableDocument, enableDocument, syncDocument } from '@/service/datasets'
|
import { archiveDocument, deleteDocument, disableDocument, enableDocument, syncDocument, unArchiveDocument } from '@/service/datasets'
|
||||||
import NotionIcon from '@/app/components/base/notion-icon'
|
import NotionIcon from '@/app/components/base/notion-icon'
|
||||||
import ProgressBar from '@/app/components/base/progress-bar'
|
import ProgressBar from '@/app/components/base/progress-bar'
|
||||||
import { DataSourceType, type DocumentDisplayStatus, type SimpleDocumentDetail } from '@/models/datasets'
|
import { DataSourceType, type DocumentDisplayStatus, type SimpleDocumentDetail } from '@/models/datasets'
|
||||||
import type { CommonResponse } from '@/models/common'
|
import type { CommonResponse } from '@/models/common'
|
||||||
import { FilePlus02 } from '@/app/components/base/icons/src/vender/line/files'
|
import { DotsHorizontal } from '@/app/components/base/icons/src/vender/line/general'
|
||||||
|
|
||||||
export const SettingsIcon: FC<{ className?: string }> = ({ className }) => {
|
export const SettingsIcon: FC<{ className?: string }> = ({ className }) => {
|
||||||
return <svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg" className={className ?? ''}>
|
return <svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg" className={className ?? ''}>
|
||||||
@ -86,7 +86,7 @@ export const StatusItem: FC<{
|
|||||||
</div>
|
</div>
|
||||||
}
|
}
|
||||||
|
|
||||||
type OperationName = 'delete' | 'archive' | 'enable' | 'disable' | 'sync'
|
type OperationName = 'delete' | 'archive' | 'enable' | 'disable' | 'sync' | 'un_archive'
|
||||||
|
|
||||||
// operation action for list and detail
|
// operation action for list and detail
|
||||||
export const OperationAction: FC<{
|
export const OperationAction: FC<{
|
||||||
@ -101,8 +101,7 @@ export const OperationAction: FC<{
|
|||||||
onUpdate: (operationName?: string) => void
|
onUpdate: (operationName?: string) => void
|
||||||
scene?: 'list' | 'detail'
|
scene?: 'list' | 'detail'
|
||||||
className?: string
|
className?: string
|
||||||
showNewSegmentModal?: () => void
|
}> = ({ datasetId, detail, onUpdate, scene = 'list', className = '' }) => {
|
||||||
}> = ({ datasetId, detail, onUpdate, scene = 'list', className = '', showNewSegmentModal }) => {
|
|
||||||
const { id, enabled = false, archived = false, data_source_type } = detail || {}
|
const { id, enabled = false, archived = false, data_source_type } = detail || {}
|
||||||
const [showModal, setShowModal] = useState(false)
|
const [showModal, setShowModal] = useState(false)
|
||||||
const { notify } = useContext(ToastContext)
|
const { notify } = useContext(ToastContext)
|
||||||
@ -117,6 +116,9 @@ export const OperationAction: FC<{
|
|||||||
case 'archive':
|
case 'archive':
|
||||||
opApi = archiveDocument
|
opApi = archiveDocument
|
||||||
break
|
break
|
||||||
|
case 'un_archive':
|
||||||
|
opApi = unArchiveDocument
|
||||||
|
break
|
||||||
case 'enable':
|
case 'enable':
|
||||||
opApi = enableDocument
|
opApi = enableDocument
|
||||||
break
|
break
|
||||||
@ -218,10 +220,72 @@ export const OperationAction: FC<{
|
|||||||
<Divider className='!ml-4 !mr-2 !h-3' type='vertical' />
|
<Divider className='!ml-4 !mr-2 !h-3' type='vertical' />
|
||||||
</>}
|
</>}
|
||||||
<Popover
|
<Popover
|
||||||
htmlContent={<Operations />}
|
htmlContent={
|
||||||
|
<div className='w-full py-1'>
|
||||||
|
{!isListScene && <>
|
||||||
|
<div className='flex justify-between items-center mx-4 pt-2'>
|
||||||
|
<span className={cn(s.actionName, 'font-medium')}>
|
||||||
|
{!archived && enabled ? t('datasetDocuments.list.index.enable') : t('datasetDocuments.list.index.disable')}
|
||||||
|
</span>
|
||||||
|
<Tooltip
|
||||||
|
selector={`detail-switch-${id}`}
|
||||||
|
content={t('datasetDocuments.list.action.enableWarning') as string}
|
||||||
|
className='!font-semibold'
|
||||||
|
disabled={!archived}
|
||||||
|
>
|
||||||
|
<div>
|
||||||
|
<Switch
|
||||||
|
defaultValue={archived ? false : enabled}
|
||||||
|
onChange={v => !archived && onOperate(v ? 'enable' : 'disable')}
|
||||||
|
disabled={archived}
|
||||||
|
size='md'
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</Tooltip>
|
||||||
|
</div>
|
||||||
|
<div className='mx-4 pb-1 pt-0.5 text-xs text-gray-500'>
|
||||||
|
{!archived && enabled ? t('datasetDocuments.list.index.enableTip') : t('datasetDocuments.list.index.disableTip')}
|
||||||
|
</div>
|
||||||
|
<Divider />
|
||||||
|
</>}
|
||||||
|
{!archived && (
|
||||||
|
<>
|
||||||
|
<div className={s.actionItem} onClick={() => router.push(`/datasets/${datasetId}/documents/${detail.id}/settings`)}>
|
||||||
|
<SettingsIcon />
|
||||||
|
<span className={s.actionName}>{t('datasetDocuments.list.action.settings')}</span>
|
||||||
|
</div>
|
||||||
|
{data_source_type === 'notion_import' && (
|
||||||
|
<div className={s.actionItem} onClick={() => onOperate('sync')}>
|
||||||
|
<SyncIcon />
|
||||||
|
<span className={s.actionName}>{t('datasetDocuments.list.action.sync')}</span>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
<Divider className='my-1' />
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
{!archived && <div className={s.actionItem} onClick={() => onOperate('archive')}>
|
||||||
|
<ArchiveIcon />
|
||||||
|
<span className={s.actionName}>{t('datasetDocuments.list.action.archive')}</span>
|
||||||
|
</div>}
|
||||||
|
{archived && (
|
||||||
|
<div className={s.actionItem} onClick={() => onOperate('un_archive')}>
|
||||||
|
<ArchiveIcon />
|
||||||
|
<span className={s.actionName}>{t('datasetDocuments.list.action.unarchive')}</span>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
<div className={cn(s.actionItem, s.deleteActionItem, 'group')} onClick={() => setShowModal(true)}>
|
||||||
|
<TrashIcon className={'w-4 h-4 stroke-current text-gray-500 stroke-2 group-hover:text-red-500'} />
|
||||||
|
<span className={cn(s.actionName, 'group-hover:text-red-500')}>{t('datasetDocuments.list.action.delete')}</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
}
|
||||||
trigger='click'
|
trigger='click'
|
||||||
position='br'
|
position='br'
|
||||||
btnElement={<div className={cn(s.actionIcon, s.commonIcon)} />}
|
btnElement={
|
||||||
|
<div className={cn(s.commonIcon)}>
|
||||||
|
<DotsHorizontal className='w-4 h-4 text-gray-700' />
|
||||||
|
</div>
|
||||||
|
}
|
||||||
btnClassName={open => cn(isListScene ? s.actionIconWrapperList : s.actionIconWrapperDetail, open ? '!bg-gray-100 !shadow-none' : '!bg-transparent')}
|
btnClassName={open => cn(isListScene ? s.actionIconWrapperList : s.actionIconWrapperDetail, open ? '!bg-gray-100 !shadow-none' : '!bg-transparent')}
|
||||||
className={`!w-[200px] h-fit !z-20 ${className}`}
|
className={`!w-[200px] h-fit !z-20 ${className}`}
|
||||||
/>
|
/>
|
||||||
|
@ -10,6 +10,10 @@ import { ToastContext } from '@/app/components/base/toast'
|
|||||||
import Button from '@/app/components/base/button'
|
import Button from '@/app/components/base/button'
|
||||||
import { fetchDataDetail, updateDatasetSetting } from '@/service/datasets'
|
import { fetchDataDetail, updateDatasetSetting } from '@/service/datasets'
|
||||||
import type { DataSet } from '@/models/datasets'
|
import type { DataSet } from '@/models/datasets'
|
||||||
|
import ModelSelector from '@/app/components/header/account-setting/model-page/model-selector'
|
||||||
|
import type { ProviderEnum } from '@/app/components/header/account-setting/model-page/declarations'
|
||||||
|
import { ModelType } from '@/app/components/header/account-setting/model-page/declarations'
|
||||||
|
import AccountSetting from '@/app/components/header/account-setting'
|
||||||
|
|
||||||
const rowClass = `
|
const rowClass = `
|
||||||
flex justify-between py-4
|
flex justify-between py-4
|
||||||
@ -41,7 +45,7 @@ const Form = ({
|
|||||||
const [description, setDescription] = useState(currentDataset?.description ?? '')
|
const [description, setDescription] = useState(currentDataset?.description ?? '')
|
||||||
const [permission, setPermission] = useState(currentDataset?.permission)
|
const [permission, setPermission] = useState(currentDataset?.permission)
|
||||||
const [indexMethod, setIndexMethod] = useState(currentDataset?.indexing_technique)
|
const [indexMethod, setIndexMethod] = useState(currentDataset?.indexing_technique)
|
||||||
|
const [showSetAPIKeyModal, setShowSetAPIKeyModal] = useState(false)
|
||||||
const handleSave = async () => {
|
const handleSave = async () => {
|
||||||
if (loading)
|
if (loading)
|
||||||
return
|
return
|
||||||
@ -128,6 +132,32 @@ const Form = ({
|
|||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
<div className={rowClass}>
|
||||||
|
<div className={labelClass}>
|
||||||
|
<div>{t('datasetSettings.form.embeddingModel')}</div>
|
||||||
|
</div>
|
||||||
|
<div className='w-[480px]'>
|
||||||
|
{currentDataset && (
|
||||||
|
<>
|
||||||
|
<div className='w-full h-9 rounded-lg bg-gray-100 opacity-60'>
|
||||||
|
<ModelSelector
|
||||||
|
readonly
|
||||||
|
value={{
|
||||||
|
providerName: currentDataset.embedding_model_provider as ProviderEnum,
|
||||||
|
modelName: currentDataset.embedding_model,
|
||||||
|
}}
|
||||||
|
modelType={ModelType.embeddings}
|
||||||
|
onChange={() => {}}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div className='mt-2 w-full text-xs leading-6 text-gray-500'>
|
||||||
|
{t('datasetSettings.form.embeddingModelTip')}
|
||||||
|
<span className='text-[#155eef] cursor-pointer' onClick={() => setShowSetAPIKeyModal(true)}>{t('datasetSettings.form.embeddingModelTipLink')}</span>
|
||||||
|
</div>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
<div className={rowClass}>
|
<div className={rowClass}>
|
||||||
<div className={labelClass} />
|
<div className={labelClass} />
|
||||||
<div className='w-[480px]'>
|
<div className='w-[480px]'>
|
||||||
@ -140,6 +170,11 @@ const Form = ({
|
|||||||
</Button>
|
</Button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
{showSetAPIKeyModal && (
|
||||||
|
<AccountSetting activeTab="provider" onCancel={async () => {
|
||||||
|
setShowSetAPIKeyModal(false)
|
||||||
|
}} />
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
@ -75,6 +75,7 @@ const translation = {
|
|||||||
economicalTip: 'Use offline vector engines, keyword indexes, etc. to reduce accuracy without spending tokens',
|
economicalTip: 'Use offline vector engines, keyword indexes, etc. to reduce accuracy without spending tokens',
|
||||||
QATitle: 'Segmenting in Question & Answer format',
|
QATitle: 'Segmenting in Question & Answer format',
|
||||||
QATip: 'Enable this option will consume more tokens',
|
QATip: 'Enable this option will consume more tokens',
|
||||||
|
QALanguage: 'Segment using',
|
||||||
emstimateCost: 'Estimation',
|
emstimateCost: 'Estimation',
|
||||||
emstimateSegment: 'Estimated segments',
|
emstimateSegment: 'Estimated segments',
|
||||||
segmentCount: 'segments',
|
segmentCount: 'segments',
|
||||||
|
@ -75,6 +75,7 @@ const translation = {
|
|||||||
economicalTip: '使用离线的向量引擎、关键词索引等方式,降低了准确度但无需花费 Token',
|
economicalTip: '使用离线的向量引擎、关键词索引等方式,降低了准确度但无需花费 Token',
|
||||||
QATitle: '采用 Q&A 分段模式',
|
QATitle: '采用 Q&A 分段模式',
|
||||||
QATip: '开启后将会消耗额外的 token',
|
QATip: '开启后将会消耗额外的 token',
|
||||||
|
QALanguage: '分段使用',
|
||||||
emstimateCost: '执行嵌入预估消耗',
|
emstimateCost: '执行嵌入预估消耗',
|
||||||
emstimateSegment: '预估分段数',
|
emstimateSegment: '预估分段数',
|
||||||
segmentCount: '段',
|
segmentCount: '段',
|
||||||
|
@ -17,8 +17,11 @@ const translation = {
|
|||||||
action: {
|
action: {
|
||||||
uploadFile: 'Upload new file',
|
uploadFile: 'Upload new file',
|
||||||
settings: 'Segment settings',
|
settings: 'Segment settings',
|
||||||
add: 'Add new segment',
|
addButton: 'Add segment',
|
||||||
|
add: 'Add a segment',
|
||||||
|
batchAdd: 'Batch add',
|
||||||
archive: 'Archive',
|
archive: 'Archive',
|
||||||
|
unarchive: 'Unarchive',
|
||||||
delete: 'Delete',
|
delete: 'Delete',
|
||||||
enableWarning: 'Archived file cannot be enabled',
|
enableWarning: 'Archived file cannot be enabled',
|
||||||
sync: 'Sync',
|
sync: 'Sync',
|
||||||
@ -53,6 +56,24 @@ const translation = {
|
|||||||
title: 'Are you sure Delete?',
|
title: 'Are you sure Delete?',
|
||||||
content: 'If you need to resume processing later, you will continue from where you left off',
|
content: 'If you need to resume processing later, you will continue from where you left off',
|
||||||
},
|
},
|
||||||
|
batchModal: {
|
||||||
|
title: 'Batch add segments',
|
||||||
|
csvUploadTitle: 'Drag and drop your CSV file here, or ',
|
||||||
|
browse: 'browse',
|
||||||
|
tip: 'The CSV file must conform to the following structure:',
|
||||||
|
question: 'question',
|
||||||
|
answer: 'answer',
|
||||||
|
contentTitle: 'segment content',
|
||||||
|
content: 'content',
|
||||||
|
template: 'Download the template here',
|
||||||
|
cancel: 'Cancel',
|
||||||
|
run: 'Run Batch',
|
||||||
|
runError: 'Run batch failed',
|
||||||
|
processing: 'In batch processing',
|
||||||
|
completed: 'Import completed',
|
||||||
|
error: 'Import Error',
|
||||||
|
ok: 'OK',
|
||||||
|
},
|
||||||
},
|
},
|
||||||
metadata: {
|
metadata: {
|
||||||
title: 'Metadata',
|
title: 'Metadata',
|
||||||
@ -321,6 +342,7 @@ const translation = {
|
|||||||
contentEmpty: 'Content can not be empty',
|
contentEmpty: 'Content can not be empty',
|
||||||
newTextSegment: 'New Text Segment',
|
newTextSegment: 'New Text Segment',
|
||||||
newQaSegment: 'New Q&A Segment',
|
newQaSegment: 'New Q&A Segment',
|
||||||
|
delete: 'Delete this segment ?',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -17,8 +17,11 @@ const translation = {
|
|||||||
action: {
|
action: {
|
||||||
uploadFile: '上传新文件',
|
uploadFile: '上传新文件',
|
||||||
settings: '分段设置',
|
settings: '分段设置',
|
||||||
|
addButton: '添加分段',
|
||||||
add: '添加新分段',
|
add: '添加新分段',
|
||||||
|
batchAdd: '批量添加',
|
||||||
archive: '归档',
|
archive: '归档',
|
||||||
|
unarchive: '撤销归档',
|
||||||
delete: '删除',
|
delete: '删除',
|
||||||
enableWarning: '归档的文件无法启用',
|
enableWarning: '归档的文件无法启用',
|
||||||
sync: '同步',
|
sync: '同步',
|
||||||
@ -53,6 +56,24 @@ const translation = {
|
|||||||
title: '确定删除吗?',
|
title: '确定删除吗?',
|
||||||
content: '如果您需要稍后恢复处理,您将从您离开的地方继续',
|
content: '如果您需要稍后恢复处理,您将从您离开的地方继续',
|
||||||
},
|
},
|
||||||
|
batchModal: {
|
||||||
|
title: '批量添加分段',
|
||||||
|
csvUploadTitle: '将您的 CSV 文件拖放到此处,或',
|
||||||
|
browse: '选择文件',
|
||||||
|
tip: 'CSV 文件必须符合以下结构:',
|
||||||
|
question: '问题',
|
||||||
|
answer: '回答',
|
||||||
|
contentTitle: '分段内容',
|
||||||
|
content: '内容',
|
||||||
|
template: '下载模板',
|
||||||
|
cancel: '取消',
|
||||||
|
run: '导入',
|
||||||
|
runError: '批量导入失败',
|
||||||
|
processing: '批量处理中',
|
||||||
|
completed: '导入完成',
|
||||||
|
error: '导入出错',
|
||||||
|
ok: '确定',
|
||||||
|
},
|
||||||
},
|
},
|
||||||
metadata: {
|
metadata: {
|
||||||
title: '元数据',
|
title: '元数据',
|
||||||
@ -320,6 +341,7 @@ const translation = {
|
|||||||
contentEmpty: '内容不能为空',
|
contentEmpty: '内容不能为空',
|
||||||
newTextSegment: '新文本分段',
|
newTextSegment: '新文本分段',
|
||||||
newQaSegment: '新问答分段',
|
newQaSegment: '新问答分段',
|
||||||
|
delete: '删除这个分段?',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -15,6 +15,9 @@ const translation = {
|
|||||||
indexMethodHighQualityTip: 'Call OpenAI\'s embedding interface for processing to provide higher accuracy when users query.',
|
indexMethodHighQualityTip: 'Call OpenAI\'s embedding interface for processing to provide higher accuracy when users query.',
|
||||||
indexMethodEconomy: 'Economical',
|
indexMethodEconomy: 'Economical',
|
||||||
indexMethodEconomyTip: 'Use offline vector engines, keyword indexes, etc. to reduce accuracy without spending tokens',
|
indexMethodEconomyTip: 'Use offline vector engines, keyword indexes, etc. to reduce accuracy without spending tokens',
|
||||||
|
embeddingModel: 'Embedding Model',
|
||||||
|
embeddingModelTip: 'Change the embedded model, please go to ',
|
||||||
|
embeddingModelTipLink: 'Settings',
|
||||||
save: 'Save',
|
save: 'Save',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
@ -15,6 +15,9 @@ const translation = {
|
|||||||
indexMethodHighQualityTip: '调用 OpenAI 的嵌入接口进行处理,以在用户查询时提供更高的准确度',
|
indexMethodHighQualityTip: '调用 OpenAI 的嵌入接口进行处理,以在用户查询时提供更高的准确度',
|
||||||
indexMethodEconomy: '经济',
|
indexMethodEconomy: '经济',
|
||||||
indexMethodEconomyTip: '使用离线的向量引擎、关键词索引等方式,降低了准确度但无需花费 Token',
|
indexMethodEconomyTip: '使用离线的向量引擎、关键词索引等方式,降低了准确度但无需花费 Token',
|
||||||
|
embeddingModel: 'Embedding 模型',
|
||||||
|
embeddingModelTip: '修改 Embedding 模型,请去',
|
||||||
|
embeddingModelTipLink: '设置',
|
||||||
save: '保存',
|
save: '保存',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
@ -16,6 +16,8 @@ const translation = {
|
|||||||
intro4: 'or it ',
|
intro4: 'or it ',
|
||||||
intro5: 'can be created',
|
intro5: 'can be created',
|
||||||
intro6: ' as a standalone ChatGPT index plug-in to publish',
|
intro6: ' as a standalone ChatGPT index plug-in to publish',
|
||||||
|
unavailable: 'Unavailable',
|
||||||
|
unavailableTip: 'Embedding model is not available, the default embedding model needs to be configured',
|
||||||
}
|
}
|
||||||
|
|
||||||
export default translation
|
export default translation
|
||||||
|
@ -16,6 +16,8 @@ const translation = {
|
|||||||
intro4: '或可以',
|
intro4: '或可以',
|
||||||
intro5: '创建',
|
intro5: '创建',
|
||||||
intro6: '为独立的 ChatGPT 插件发布使用',
|
intro6: '为独立的 ChatGPT 插件发布使用',
|
||||||
|
unavailable: '不可用',
|
||||||
|
unavailableTip: '由于 embedding 模型不可用,需要配置默认 embedding 模型',
|
||||||
}
|
}
|
||||||
|
|
||||||
export default translation
|
export default translation
|
||||||
|
@ -22,6 +22,9 @@ export type DataSet = {
|
|||||||
app_count: number
|
app_count: number
|
||||||
document_count: number
|
document_count: number
|
||||||
word_count: number
|
word_count: number
|
||||||
|
embedding_model: string
|
||||||
|
embedding_model_provider: string
|
||||||
|
embedding_available: boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
export type CustomFile = File & {
|
export type CustomFile = File & {
|
||||||
@ -184,6 +187,7 @@ export type CreateDocumentReq = {
|
|||||||
original_document_id?: string
|
original_document_id?: string
|
||||||
indexing_technique?: string
|
indexing_technique?: string
|
||||||
doc_form: 'text_model' | 'qa_model'
|
doc_form: 'text_model' | 'qa_model'
|
||||||
|
doc_language: string
|
||||||
data_source: DataSource
|
data_source: DataSource
|
||||||
process_rule: ProcessRule
|
process_rule: ProcessRule
|
||||||
}
|
}
|
||||||
@ -390,3 +394,8 @@ export type SegmentUpdator = {
|
|||||||
answer?: string
|
answer?: string
|
||||||
keywords?: string[]
|
keywords?: string[]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export enum DocForm {
|
||||||
|
TEXT = 'text_model',
|
||||||
|
QA = 'qa_model',
|
||||||
|
}
|
||||||
|
@ -118,6 +118,10 @@ export const archiveDocument: Fetcher<CommonResponse, CommonDocReq> = ({ dataset
|
|||||||
return patch(`/datasets/${datasetId}/documents/${documentId}/status/archive`) as Promise<CommonResponse>
|
return patch(`/datasets/${datasetId}/documents/${documentId}/status/archive`) as Promise<CommonResponse>
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export const unArchiveDocument: Fetcher<CommonResponse, CommonDocReq> = ({ datasetId, documentId }) => {
|
||||||
|
return patch(`/datasets/${datasetId}/documents/${documentId}/status/un_archive`) as Promise<CommonResponse>
|
||||||
|
}
|
||||||
|
|
||||||
export const enableDocument: Fetcher<CommonResponse, CommonDocReq> = ({ datasetId, documentId }) => {
|
export const enableDocument: Fetcher<CommonResponse, CommonDocReq> = ({ datasetId, documentId }) => {
|
||||||
return patch(`/datasets/${datasetId}/documents/${documentId}/status/enable`) as Promise<CommonResponse>
|
return patch(`/datasets/${datasetId}/documents/${documentId}/status/enable`) as Promise<CommonResponse>
|
||||||
}
|
}
|
||||||
@ -138,10 +142,6 @@ export const modifyDocMetadata: Fetcher<CommonResponse, CommonDocReq & { body: {
|
|||||||
return put(`/datasets/${datasetId}/documents/${documentId}/metadata`, { body }) as Promise<CommonResponse>
|
return put(`/datasets/${datasetId}/documents/${documentId}/metadata`, { body }) as Promise<CommonResponse>
|
||||||
}
|
}
|
||||||
|
|
||||||
export const getDatasetIndexingStatus: Fetcher<{ data: IndexingStatusResponse[] }, string> = (datasetId) => {
|
|
||||||
return get(`/datasets/${datasetId}/indexing-status`) as Promise<{ data: IndexingStatusResponse[] }>
|
|
||||||
}
|
|
||||||
|
|
||||||
// apis for segments in a document
|
// apis for segments in a document
|
||||||
|
|
||||||
export const fetchSegments: Fetcher<SegmentsResponse, CommonDocReq & { params: SegmentsQuery }> = ({ datasetId, documentId, params }) => {
|
export const fetchSegments: Fetcher<SegmentsResponse, CommonDocReq & { params: SegmentsQuery }> = ({ datasetId, documentId, params }) => {
|
||||||
@ -164,6 +164,18 @@ export const addSegment: Fetcher<{ data: SegmentDetailModel; doc_form: string },
|
|||||||
return post(`/datasets/${datasetId}/documents/${documentId}/segment`, { body }) as Promise<{ data: SegmentDetailModel; doc_form: string }>
|
return post(`/datasets/${datasetId}/documents/${documentId}/segment`, { body }) as Promise<{ data: SegmentDetailModel; doc_form: string }>
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export const deleteSegment: Fetcher<CommonResponse, { datasetId: string; documentId: string; segmentId: string }> = ({ datasetId, documentId, segmentId }) => {
|
||||||
|
return del(`/datasets/${datasetId}/documents/${documentId}/segments/${segmentId}`) as Promise<CommonResponse>
|
||||||
|
}
|
||||||
|
|
||||||
|
export const segmentBatchImport: Fetcher<{ job_id: string; job_status: string }, { url: string; body: FormData }> = ({ url, body }) => {
|
||||||
|
return post(url, { body }, { bodyStringify: false, deleteContentType: true }) as Promise<{ job_id: string; job_status: string }>
|
||||||
|
}
|
||||||
|
|
||||||
|
export const checkSegmentBatchImportProgress: Fetcher<{ job_id: string; job_status: string }, { jobID: string }> = ({ jobID }) => {
|
||||||
|
return get(`/datasets/batch_import_status/${jobID}`) as Promise<{ job_id: string; job_status: string }>
|
||||||
|
}
|
||||||
|
|
||||||
// hit testing
|
// hit testing
|
||||||
export const hitTesting: Fetcher<HitTestingResponse, { datasetId: string; queryText: string }> = ({ datasetId, queryText }) => {
|
export const hitTesting: Fetcher<HitTestingResponse, { datasetId: string; queryText: string }> = ({ datasetId, queryText }) => {
|
||||||
return post(`/datasets/${datasetId}/hit-testing`, { body: { query: queryText } }) as Promise<HitTestingResponse>
|
return post(`/datasets/${datasetId}/hit-testing`, { body: { query: queryText } }) as Promise<HitTestingResponse>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user