diff --git a/backend/open_webui/apps/retrieval/main.py b/backend/open_webui/apps/retrieval/main.py index e0e4ea341..e3da662be 100644 --- a/backend/open_webui/apps/retrieval/main.py +++ b/backend/open_webui/apps/retrieval/main.py @@ -1,3 +1,5 @@ +# TODO: Merge this with the webui_app and make it a single app + import json import logging import mimetypes @@ -634,9 +636,23 @@ def save_docs_to_vector_db( metadata: Optional[dict] = None, overwrite: bool = False, split: bool = True, + add: bool = False, ) -> bool: log.info(f"save_docs_to_vector_db {docs} {collection_name}") + # Check if entries with the same hash (metadata.hash) already exist + if metadata and "hash" in metadata: + result = VECTOR_DB_CLIENT.query( + collection_name=collection_name, + filter={"hash": metadata["hash"]}, + ) + + if result: + existing_doc_ids = result.ids[0] + if existing_doc_ids: + log.info(f"Document with hash {metadata['hash']} already exists") + raise ValueError(ERROR_MESSAGES.DUPLICATE_CONTENT) + if split: text_splitter = RecursiveCharacterTextSplitter( chunk_size=app.state.config.CHUNK_SIZE, @@ -659,42 +675,46 @@ def save_docs_to_vector_db( metadata[key] = str(value) try: - if overwrite: - if VECTOR_DB_CLIENT.has_collection(collection_name=collection_name): - log.info(f"deleting existing collection {collection_name}") - VECTOR_DB_CLIENT.delete_collection(collection_name=collection_name) - if VECTOR_DB_CLIENT.has_collection(collection_name=collection_name): log.info(f"collection {collection_name} already exists") - return True - else: - embedding_function = get_embedding_function( - app.state.config.RAG_EMBEDDING_ENGINE, - app.state.config.RAG_EMBEDDING_MODEL, - app.state.sentence_transformer_ef, - app.state.config.OPENAI_API_KEY, - app.state.config.OPENAI_API_BASE_URL, - app.state.config.RAG_EMBEDDING_OPENAI_BATCH_SIZE, - ) - embeddings = embedding_function( - list(map(lambda x: x.replace("\n", " "), texts)) - ) + if overwrite: + VECTOR_DB_CLIENT.delete_collection(collection_name=collection_name) + log.info(f"deleting existing collection {collection_name}") - VECTOR_DB_CLIENT.insert( - collection_name=collection_name, - items=[ - { - "id": str(uuid.uuid4()), - "text": text, - "vector": embeddings[idx], - "metadata": metadatas[idx], - } - for idx, text in enumerate(texts) - ], - ) + if add is False: + return True - return True + log.info(f"adding to collection {collection_name}") + embedding_function = get_embedding_function( + app.state.config.RAG_EMBEDDING_ENGINE, + app.state.config.RAG_EMBEDDING_MODEL, + app.state.sentence_transformer_ef, + app.state.config.OPENAI_API_KEY, + app.state.config.OPENAI_API_BASE_URL, + app.state.config.RAG_EMBEDDING_OPENAI_BATCH_SIZE, + ) + + embeddings = embedding_function( + list(map(lambda x: x.replace("\n", " "), texts)) + ) + + items = [ + { + "id": str(uuid.uuid4()), + "text": text, + "vector": embeddings[idx], + "metadata": metadatas[idx], + } + for idx, text in enumerate(texts) + ] + + VECTOR_DB_CLIENT.insert( + collection_name=collection_name, + items=items, + ) + + return True except Exception as e: log.exception(e) return False @@ -702,6 +722,7 @@ def save_docs_to_vector_db( class ProcessFileForm(BaseModel): file_id: str + content: Optional[str] = None collection_name: Optional[str] = None @@ -712,42 +733,91 @@ def process_file( ): try: file = Files.get_file_by_id(form_data.file_id) - file_path = file.meta.get("path", f"{UPLOAD_DIR}/{file.filename}") collection_name = form_data.collection_name if collection_name is None: - with open(file_path, "rb") as f: - collection_name = calculate_sha256(f)[:63] + collection_name = f"file-{file.id}" loader = Loader( engine=app.state.config.CONTENT_EXTRACTION_ENGINE, TIKA_SERVER_URL=app.state.config.TIKA_SERVER_URL, PDF_EXTRACT_IMAGES=app.state.config.PDF_EXTRACT_IMAGES, ) - docs = loader.load(file.filename, file.meta.get("content_type"), file_path) - text_content = " ".join([doc.page_content for doc in docs]) - log.debug(f"text_content: {text_content}") - Files.update_files_metadata_by_id( - form_data.file_id, - { - "content": { - "text": text_content, - } - }, + if form_data.content: + docs = [ + Document( + page_content=form_data.content, + metadata={ + "name": file.meta.get("name", file.filename), + "created_by": file.user_id, + **file.meta, + }, + ) + ] + + text_content = form_data.content + elif file.data.get("content", None): + docs = [ + Document( + page_content=file.data.get("content", ""), + metadata={ + "name": file.meta.get("name", file.filename), + "created_by": file.user_id, + **file.meta, + }, + ) + ] + text_content = file.data.get("content", "") + else: + file_path = file.meta.get("path", None) + if file_path: + docs = loader.load( + file.filename, file.meta.get("content_type"), file_path + ) + else: + docs = [ + Document( + page_content=file.data.get("content", ""), + metadata={ + "name": file.filename, + "created_by": file.user_id, + **file.meta, + }, + ) + ] + + text_content = " ".join([doc.page_content for doc in docs]) + + log.debug(f"text_content: {text_content}") + Files.update_file_data_by_id( + file.id, + {"content": text_content}, ) + hash = calculate_sha256_string(text_content) + Files.update_file_hash_by_id(file.id, hash) + try: result = save_docs_to_vector_db( - docs, - collection_name, - { - "file_id": form_data.file_id, + docs=docs, + collection_name=collection_name, + metadata={ + "file_id": file.id, "name": file.meta.get("name", file.filename), + "hash": hash, }, + add=(True if form_data.collection_name else False), ) if result: + Files.update_file_metadata_by_id( + file.id, + { + "collection_name": collection_name, + }, + ) + return { "status": True, "collection_name": collection_name, @@ -755,10 +825,7 @@ def process_file( "content": text_content, } except Exception as e: - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail=e, - ) + raise e except Exception as e: log.exception(e) if "No pandoc was found" in str(e): @@ -769,7 +836,7 @@ def process_file( else: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, - detail=ERROR_MESSAGES.DEFAULT(e), + detail=str(e), ) @@ -1183,6 +1250,30 @@ def query_collection_handler( #################################### +class DeleteForm(BaseModel): + collection_name: str + file_id: str + + +@app.post("/delete") +def delete_entries_from_collection(form_data: DeleteForm, user=Depends(get_admin_user)): + try: + if VECTOR_DB_CLIENT.has_collection(collection_name=form_data.collection_name): + file = Files.get_file_by_id(form_data.file_id) + hash = file.hash + + VECTOR_DB_CLIENT.delete( + collection_name=form_data.collection_name, + metadata={"hash": hash}, + ) + return {"status": True} + else: + return {"status": False} + except Exception as e: + log.exception(e) + return {"status": False} + + @app.post("/reset/db") def reset_vector_db(user=Depends(get_admin_user)): VECTOR_DB_CLIENT.reset() diff --git a/backend/open_webui/apps/retrieval/utils.py b/backend/open_webui/apps/retrieval/utils.py index 12c30edbb..b796f0a75 100644 --- a/backend/open_webui/apps/retrieval/utils.py +++ b/backend/open_webui/apps/retrieval/utils.py @@ -319,17 +319,25 @@ def get_rag_context( for file in files: if file.get("context") == "full": context = { - "documents": [[file.get("file").get("content")]], + "documents": [[file.get("file").get("data", {}).get("content")]], "metadatas": [[{"file_id": file.get("id"), "name": file.get("name")}]], } else: context = None - collection_names = ( - file["collection_names"] - if file["type"] == "collection" - else [file["collection_name"]] if file["collection_name"] else [] - ) + collection_names = [] + if file.get("type") == "collection": + if file.get("legacy"): + collection_names = file.get("collection_names", []) + else: + collection_names.append(file["id"]) + elif file.get("collection_name"): + collection_names.append(file["collection_name"]) + elif file.get("id"): + if file.get("legacy"): + collection_names.append(f"{file['id']}") + else: + collection_names.append(f"file-{file['id']}") collection_names = set(collection_names).difference(extracted_collections) if not collection_names: diff --git a/backend/open_webui/apps/retrieval/vector/dbs/chroma.py b/backend/open_webui/apps/retrieval/vector/dbs/chroma.py index fe065f868..00b4af441 100644 --- a/backend/open_webui/apps/retrieval/vector/dbs/chroma.py +++ b/backend/open_webui/apps/retrieval/vector/dbs/chroma.py @@ -49,22 +49,52 @@ class ChromaClient: self, collection_name: str, vectors: list[list[float | int]], limit: int ) -> Optional[SearchResult]: # Search for the nearest neighbor items based on the vectors and return 'limit' number of results. - collection = self.client.get_collection(name=collection_name) - if collection: - result = collection.query( - query_embeddings=vectors, - n_results=limit, - ) + try: + collection = self.client.get_collection(name=collection_name) + if collection: + result = collection.query( + query_embeddings=vectors, + n_results=limit, + ) - return SearchResult( - **{ - "ids": result["ids"], - "distances": result["distances"], - "documents": result["documents"], - "metadatas": result["metadatas"], - } - ) - return None + return SearchResult( + **{ + "ids": result["ids"], + "distances": result["distances"], + "documents": result["documents"], + "metadatas": result["metadatas"], + } + ) + return None + except Exception as e: + return None + + def query( + self, collection_name: str, filter: dict, limit: int = 2 + ) -> Optional[GetResult]: + # Query the items from the collection based on the filter. + + try: + collection = self.client.get_collection(name=collection_name) + if collection: + result = collection.get( + where=filter, + limit=limit, + ) + + print(result) + + return GetResult( + **{ + "ids": [result["ids"]], + "documents": [result["documents"]], + "metadatas": [result["metadatas"]], + } + ) + return None + except Exception as e: + print(e) + return None def get(self, collection_name: str) -> Optional[GetResult]: # Get all the items in the collection. @@ -111,11 +141,19 @@ class ChromaClient: ids=ids, documents=documents, embeddings=embeddings, metadatas=metadatas ) - def delete(self, collection_name: str, ids: list[str]): + def delete( + self, + collection_name: str, + ids: Optional[list[str]] = None, + filter: Optional[dict] = None, + ): # Delete the items from the collection based on the ids. collection = self.client.get_collection(name=collection_name) if collection: - collection.delete(ids=ids) + if ids: + collection.delete(ids=ids) + elif filter: + collection.delete(where=filter) def reset(self): # Resets the database. This will delete all collections and item entries. diff --git a/backend/open_webui/apps/retrieval/vector/dbs/milvus.py b/backend/open_webui/apps/retrieval/vector/dbs/milvus.py index 77300acf2..b5bbb24b3 100644 --- a/backend/open_webui/apps/retrieval/vector/dbs/milvus.py +++ b/backend/open_webui/apps/retrieval/vector/dbs/milvus.py @@ -135,6 +135,25 @@ class MilvusClient: return self._result_to_search_result(result) + def query( + self, collection_name: str, filter: dict, limit: int = 1 + ) -> Optional[GetResult]: + # Query the items from the collection based on the filter. + filter_string = " && ".join( + [ + f"JSON_CONTAINS(metadata[{key}], '{[value] if isinstance(value, str) else value}')" + for key, value in filter.items() + ] + ) + + result = self.client.query( + collection_name=f"{self.collection_prefix}_{collection_name}", + filter=filter_string, + limit=limit, + ) + + return self._result_to_get_result([result]) + def get(self, collection_name: str) -> Optional[GetResult]: # Get all the items in the collection. result = self.client.query( @@ -187,13 +206,32 @@ class MilvusClient: ], ) - def delete(self, collection_name: str, ids: list[str]): + def delete( + self, + collection_name: str, + ids: Optional[list[str]] = None, + filter: Optional[dict] = None, + ): # Delete the items from the collection based on the ids. - return self.client.delete( - collection_name=f"{self.collection_prefix}_{collection_name}", - ids=ids, - ) + if ids: + return self.client.delete( + collection_name=f"{self.collection_prefix}_{collection_name}", + ids=ids, + ) + elif filter: + # Convert the filter dictionary to a string using JSON_CONTAINS. + filter_string = " && ".join( + [ + f"JSON_CONTAINS(metadata[{key}], '{[value] if isinstance(value, str) else value}')" + for key, value in filter.items() + ] + ) + + return self.client.delete( + collection_name=f"{self.collection_prefix}_{collection_name}", + filter=filter_string, + ) def reset(self): # Resets the database. This will delete all collections and item entries. diff --git a/backend/open_webui/apps/webui/main.py b/backend/open_webui/apps/webui/main.py index 6c6f197dd..1210ac7c3 100644 --- a/backend/open_webui/apps/webui/main.py +++ b/backend/open_webui/apps/webui/main.py @@ -10,11 +10,11 @@ from open_webui.apps.webui.routers import ( auths, chats, configs, - documents, files, functions, memories, models, + knowledge, prompts, tools, users, @@ -111,15 +111,15 @@ app.include_router(auths.router, prefix="/auths", tags=["auths"]) app.include_router(users.router, prefix="/users", tags=["users"]) app.include_router(chats.router, prefix="/chats", tags=["chats"]) -app.include_router(documents.router, prefix="/documents", tags=["documents"]) app.include_router(models.router, prefix="/models", tags=["models"]) +app.include_router(knowledge.router, prefix="/knowledge", tags=["knowledge"]) app.include_router(prompts.router, prefix="/prompts", tags=["prompts"]) -app.include_router(memories.router, prefix="/memories", tags=["memories"]) app.include_router(files.router, prefix="/files", tags=["files"]) app.include_router(tools.router, prefix="/tools", tags=["tools"]) app.include_router(functions.router, prefix="/functions", tags=["functions"]) +app.include_router(memories.router, prefix="/memories", tags=["memories"]) app.include_router(utils.router, prefix="/utils", tags=["utils"]) diff --git a/backend/open_webui/apps/webui/models/files.py b/backend/open_webui/apps/webui/models/files.py index cf572ac78..025c17159 100644 --- a/backend/open_webui/apps/webui/models/files.py +++ b/backend/open_webui/apps/webui/models/files.py @@ -5,7 +5,7 @@ from typing import Optional from open_webui.apps.webui.internal.db import Base, JSONField, get_db from open_webui.env import SRC_LOG_LEVELS from pydantic import BaseModel, ConfigDict -from sqlalchemy import BigInteger, Column, String, Text +from sqlalchemy import BigInteger, Column, String, Text, JSON log = logging.getLogger(__name__) log.setLevel(SRC_LOG_LEVELS["MODELS"]) @@ -20,19 +20,29 @@ class File(Base): id = Column(String, primary_key=True) user_id = Column(String) + hash = Column(Text, nullable=True) + filename = Column(Text) + data = Column(JSON, nullable=True) meta = Column(JSONField) + created_at = Column(BigInteger) + updated_at = Column(BigInteger) class FileModel(BaseModel): + model_config = ConfigDict(from_attributes=True) + id: str user_id: str - filename: str - meta: dict - created_at: int # timestamp in epoch + hash: Optional[str] = None - model_config = ConfigDict(from_attributes=True) + filename: str + data: Optional[dict] = None + meta: dict + + created_at: int # timestamp in epoch + updated_at: int # timestamp in epoch #################### @@ -43,14 +53,21 @@ class FileModel(BaseModel): class FileModelResponse(BaseModel): id: str user_id: str + hash: Optional[str] = None + filename: str + data: Optional[dict] = None meta: dict + created_at: int # timestamp in epoch + updated_at: int # timestamp in epoch class FileForm(BaseModel): id: str + hash: Optional[str] = None filename: str + data: dict = {} meta: dict = {} @@ -62,6 +79,7 @@ class FilesTable: **form_data.model_dump(), "user_id": user_id, "created_at": int(time.time()), + "updated_at": int(time.time()), } ) @@ -90,6 +108,13 @@ class FilesTable: with get_db() as db: return [FileModel.model_validate(file) for file in db.query(File).all()] + def get_files_by_ids(self, ids: list[str]) -> list[FileModel]: + with get_db() as db: + return [ + FileModel.model_validate(file) + for file in db.query(File).filter(File.id.in_(ids)).all() + ] + def get_files_by_user_id(self, user_id: str) -> list[FileModel]: with get_db() as db: return [ @@ -97,17 +122,38 @@ class FilesTable: for file in db.query(File).filter_by(user_id=user_id).all() ] - def update_files_metadata_by_id(self, id: str, meta: dict) -> Optional[FileModel]: + def update_file_hash_by_id(self, id: str, hash: str) -> Optional[FileModel]: with get_db() as db: try: file = db.query(File).filter_by(id=id).first() - file.meta = {**file.meta, **meta} + file.hash = hash db.commit() return FileModel.model_validate(file) except Exception: return None + def update_file_data_by_id(self, id: str, data: dict) -> Optional[FileModel]: + with get_db() as db: + try: + file = db.query(File).filter_by(id=id).first() + file.data = {**(file.data if file.data else {}), **data} + db.commit() + return FileModel.model_validate(file) + except Exception as e: + + return None + + def update_file_metadata_by_id(self, id: str, meta: dict) -> Optional[FileModel]: + with get_db() as db: + try: + file = db.query(File).filter_by(id=id).first() + file.meta = {**(file.meta if file.meta else {}), **meta} + db.commit() + return FileModel.model_validate(file) + except Exception: + return None + def delete_file_by_id(self, id: str) -> bool: with get_db() as db: try: diff --git a/backend/open_webui/apps/webui/models/knowledge.py b/backend/open_webui/apps/webui/models/knowledge.py new file mode 100644 index 000000000..698cccda0 --- /dev/null +++ b/backend/open_webui/apps/webui/models/knowledge.py @@ -0,0 +1,152 @@ +import json +import logging +import time +from typing import Optional +import uuid + +from open_webui.apps.webui.internal.db import Base, get_db +from open_webui.env import SRC_LOG_LEVELS +from pydantic import BaseModel, ConfigDict +from sqlalchemy import BigInteger, Column, String, Text, JSON + + +log = logging.getLogger(__name__) +log.setLevel(SRC_LOG_LEVELS["MODELS"]) + +#################### +# Knowledge DB Schema +#################### + + +class Knowledge(Base): + __tablename__ = "knowledge" + + id = Column(Text, unique=True, primary_key=True) + user_id = Column(Text) + + name = Column(Text) + description = Column(Text) + + data = Column(JSON, nullable=True) + meta = Column(JSON, nullable=True) + + created_at = Column(BigInteger) + updated_at = Column(BigInteger) + + +class KnowledgeModel(BaseModel): + model_config = ConfigDict(from_attributes=True) + + id: str + user_id: str + + name: str + description: str + + data: Optional[dict] = None + meta: Optional[dict] = None + + created_at: int # timestamp in epoch + updated_at: int # timestamp in epoch + + +#################### +# Forms +#################### + + +class KnowledgeResponse(BaseModel): + id: str + name: str + description: str + data: Optional[dict] = None + meta: Optional[dict] = None + created_at: int # timestamp in epoch + updated_at: int # timestamp in epoch + + +class KnowledgeForm(BaseModel): + name: str + description: str + data: Optional[dict] = None + + +class KnowledgeUpdateForm(BaseModel): + name: Optional[str] = None + description: Optional[str] = None + data: Optional[dict] = None + + +class KnowledgeTable: + def insert_new_knowledge( + self, user_id: str, form_data: KnowledgeForm + ) -> Optional[KnowledgeModel]: + with get_db() as db: + knowledge = KnowledgeModel( + **{ + **form_data.model_dump(), + "id": str(uuid.uuid4()), + "user_id": user_id, + "created_at": int(time.time()), + "updated_at": int(time.time()), + } + ) + + try: + result = Knowledge(**knowledge.model_dump()) + db.add(result) + db.commit() + db.refresh(result) + if result: + return KnowledgeModel.model_validate(result) + else: + return None + except Exception: + return None + + def get_knowledge_items(self) -> list[KnowledgeModel]: + with get_db() as db: + return [ + KnowledgeModel.model_validate(knowledge) + for knowledge in db.query(Knowledge) + .order_by(Knowledge.updated_at.desc()) + .all() + ] + + def get_knowledge_by_id(self, id: str) -> Optional[KnowledgeModel]: + try: + with get_db() as db: + knowledge = db.query(Knowledge).filter_by(id=id).first() + return KnowledgeModel.model_validate(knowledge) if knowledge else None + except Exception: + return None + + def update_knowledge_by_id( + self, id: str, form_data: KnowledgeUpdateForm, overwrite: bool = False + ) -> Optional[KnowledgeModel]: + try: + with get_db() as db: + knowledge = self.get_knowledge_by_id(id=id) + db.query(Knowledge).filter_by(id=id).update( + { + **form_data.model_dump(exclude_none=True), + "updated_at": int(time.time()), + } + ) + db.commit() + return self.get_knowledge_by_id(id=id) + except Exception as e: + log.exception(e) + return None + + def delete_knowledge_by_id(self, id: str) -> bool: + try: + with get_db() as db: + db.query(Knowledge).filter_by(id=id).delete() + db.commit() + return True + except Exception: + return False + + +Knowledges = KnowledgeTable() diff --git a/backend/open_webui/apps/webui/routers/files.py b/backend/open_webui/apps/webui/routers/files.py index f46a7992d..4d688b1ba 100644 --- a/backend/open_webui/apps/webui/routers/files.py +++ b/backend/open_webui/apps/webui/routers/files.py @@ -4,13 +4,18 @@ import shutil import uuid from pathlib import Path from typing import Optional +from pydantic import BaseModel from open_webui.apps.webui.models.files import FileForm, FileModel, Files +from open_webui.apps.retrieval.main import process_file, ProcessFileForm + from open_webui.config import UPLOAD_DIR from open_webui.constants import ERROR_MESSAGES from open_webui.env import SRC_LOG_LEVELS + + from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, status -from fastapi.responses import FileResponse +from fastapi.responses import FileResponse, StreamingResponse from open_webui.utils.utils import get_admin_user, get_verified_user log = logging.getLogger(__name__) @@ -58,6 +63,13 @@ def upload_file(file: UploadFile = File(...), user=Depends(get_verified_user)): ), ) + try: + process_file(ProcessFileForm(file_id=id)) + file = Files.get_file_by_id(id=id) + except Exception as e: + log.exception(e) + log.error(f"Error processing file: {file.id}") + if file: return file else: @@ -143,6 +155,55 @@ async def get_file_by_id(id: str, user=Depends(get_verified_user)): ) +############################ +# Get File Data Content By Id +############################ + + +@router.get("/{id}/data/content") +async def get_file_data_content_by_id(id: str, user=Depends(get_verified_user)): + file = Files.get_file_by_id(id) + + if file and (file.user_id == user.id or user.role == "admin"): + return {"content": file.data.get("content", "")} + else: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=ERROR_MESSAGES.NOT_FOUND, + ) + + +############################ +# Update File Data Content By Id +############################ + + +class ContentForm(BaseModel): + content: str + + +@router.post("/{id}/data/content/update") +async def update_file_data_content_by_id( + id: str, form_data: ContentForm, user=Depends(get_verified_user) +): + file = Files.get_file_by_id(id) + + if file and (file.user_id == user.id or user.role == "admin"): + try: + process_file(ProcessFileForm(file_id=id, content=form_data.content)) + file = Files.get_file_by_id(id=id) + except Exception as e: + log.exception(e) + log.error(f"Error processing file: {file.id}") + + return {"content": file.data.get("content", "")} + else: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=ERROR_MESSAGES.NOT_FOUND, + ) + + ############################ # Get File Content By Id ############################ @@ -171,34 +232,37 @@ async def get_file_content_by_id(id: str, user=Depends(get_verified_user)): ) -@router.get("/{id}/content/text") -async def get_file_text_content_by_id(id: str, user=Depends(get_verified_user)): - file = Files.get_file_by_id(id) - - if file and (file.user_id == user.id or user.role == "admin"): - return {"text": file.meta.get("content", {}).get("text", None)} - else: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail=ERROR_MESSAGES.NOT_FOUND, - ) - - @router.get("/{id}/content/{file_name}", response_model=Optional[FileModel]) async def get_file_content_by_id(id: str, user=Depends(get_verified_user)): file = Files.get_file_by_id(id) if file and (file.user_id == user.id or user.role == "admin"): - file_path = Path(file.meta["path"]) + file_path = file.meta.get("path") + if file_path: + file_path = Path(file_path) - # Check if the file already exists in the cache - if file_path.is_file(): - print(f"file_path: {file_path}") - return FileResponse(file_path) + # Check if the file already exists in the cache + if file_path.is_file(): + print(f"file_path: {file_path}") + return FileResponse(file_path) + else: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=ERROR_MESSAGES.NOT_FOUND, + ) else: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail=ERROR_MESSAGES.NOT_FOUND, + # File path doesn’t exist, return the content as .txt if possible + file_content = file.content.get("content", "") + file_name = file.filename + + # Create a generator that encodes the file content + def generator(): + yield file_content.encode("utf-8") + + return StreamingResponse( + generator(), + media_type="text/plain", + headers={"Content-Disposition": f"attachment; filename={file_name}"}, ) else: raise HTTPException( diff --git a/backend/open_webui/apps/webui/routers/knowledge.py b/backend/open_webui/apps/webui/routers/knowledge.py new file mode 100644 index 000000000..821f02ed1 --- /dev/null +++ b/backend/open_webui/apps/webui/routers/knowledge.py @@ -0,0 +1,320 @@ +import json +from typing import Optional, Union +from pydantic import BaseModel +from fastapi import APIRouter, Depends, HTTPException, status + + +from open_webui.apps.webui.models.knowledge import ( + Knowledges, + KnowledgeUpdateForm, + KnowledgeForm, + KnowledgeResponse, +) +from open_webui.apps.webui.models.files import Files, FileModel +from open_webui.apps.retrieval.vector.connector import VECTOR_DB_CLIENT +from open_webui.apps.retrieval.main import process_file, ProcessFileForm + + +from open_webui.constants import ERROR_MESSAGES +from open_webui.utils.utils import get_admin_user, get_verified_user + + +router = APIRouter() + +############################ +# GetKnowledgeItems +############################ + + +@router.get( + "/", response_model=Optional[Union[list[KnowledgeResponse], KnowledgeResponse]] +) +async def get_knowledge_items( + id: Optional[str] = None, user=Depends(get_verified_user) +): + if id: + knowledge = Knowledges.get_knowledge_by_id(id=id) + + if knowledge: + return knowledge + else: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail=ERROR_MESSAGES.NOT_FOUND, + ) + else: + return [ + KnowledgeResponse(**knowledge.model_dump()) + for knowledge in Knowledges.get_knowledge_items() + ] + + +############################ +# CreateNewKnowledge +############################ + + +@router.post("/create", response_model=Optional[KnowledgeResponse]) +async def create_new_knowledge(form_data: KnowledgeForm, user=Depends(get_admin_user)): + knowledge = Knowledges.insert_new_knowledge(user.id, form_data) + + if knowledge: + return knowledge + else: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.FILE_EXISTS, + ) + + +############################ +# GetKnowledgeById +############################ + + +class KnowledgeFilesResponse(KnowledgeResponse): + files: list[FileModel] + + +@router.get("/{id}", response_model=Optional[KnowledgeFilesResponse]) +async def get_knowledge_by_id(id: str, user=Depends(get_verified_user)): + knowledge = Knowledges.get_knowledge_by_id(id=id) + + if knowledge: + file_ids = knowledge.data.get("file_ids", []) if knowledge.data else [] + files = Files.get_files_by_ids(file_ids) + + return KnowledgeFilesResponse( + **knowledge.model_dump(), + files=files, + ) + else: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail=ERROR_MESSAGES.NOT_FOUND, + ) + + +############################ +# UpdateKnowledgeById +############################ + + +@router.post("/{id}/update", response_model=Optional[KnowledgeFilesResponse]) +async def update_knowledge_by_id( + id: str, + form_data: KnowledgeUpdateForm, + user=Depends(get_admin_user), +): + knowledge = Knowledges.update_knowledge_by_id(id=id, form_data=form_data) + + if knowledge: + file_ids = knowledge.data.get("file_ids", []) if knowledge.data else [] + files = Files.get_files_by_ids(file_ids) + + return KnowledgeFilesResponse( + **knowledge.model_dump(), + files=files, + ) + else: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.ID_TAKEN, + ) + + +############################ +# AddFileToKnowledge +############################ + + +class KnowledgeFileIdForm(BaseModel): + file_id: str + + +@router.post("/{id}/file/add", response_model=Optional[KnowledgeFilesResponse]) +def add_file_to_knowledge_by_id( + id: str, + form_data: KnowledgeFileIdForm, + user=Depends(get_admin_user), +): + knowledge = Knowledges.get_knowledge_by_id(id=id) + file = Files.get_file_by_id(form_data.file_id) + if not file: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.NOT_FOUND, + ) + if not file.data: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.FILE_NOT_PROCESSED, + ) + + # Add content to the vector database + try: + process_file(ProcessFileForm(file_id=form_data.file_id, collection_name=id)) + except Exception as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=str(e), + ) + + if knowledge: + data = knowledge.data or {} + file_ids = data.get("file_ids", []) + + if form_data.file_id not in file_ids: + file_ids.append(form_data.file_id) + data["file_ids"] = file_ids + + knowledge = Knowledges.update_knowledge_by_id( + id=id, form_data=KnowledgeUpdateForm(data=data) + ) + + if knowledge: + files = Files.get_files_by_ids(file_ids) + + return KnowledgeFilesResponse( + **knowledge.model_dump(), + files=files, + ) + else: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.DEFAULT("knowledge"), + ) + else: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.DEFAULT("file_id"), + ) + else: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.NOT_FOUND, + ) + + +@router.post("/{id}/file/update", response_model=Optional[KnowledgeFilesResponse]) +def update_file_from_knowledge_by_id( + id: str, + form_data: KnowledgeFileIdForm, + user=Depends(get_admin_user), +): + knowledge = Knowledges.get_knowledge_by_id(id=id) + file = Files.get_file_by_id(form_data.file_id) + if not file: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.NOT_FOUND, + ) + + # Remove content from the vector database + VECTOR_DB_CLIENT.delete( + collection_name=knowledge.id, filter={"file_id": form_data.file_id} + ) + + # Add content to the vector database + try: + process_file(ProcessFileForm(file_id=form_data.file_id, collection_name=id)) + except Exception as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=str(e), + ) + + if knowledge: + data = knowledge.data or {} + file_ids = data.get("file_ids", []) + + files = Files.get_files_by_ids(file_ids) + + return KnowledgeFilesResponse( + **knowledge.model_dump(), + files=files, + ) + else: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.NOT_FOUND, + ) + + +############################ +# RemoveFileFromKnowledge +############################ + + +@router.post("/{id}/file/remove", response_model=Optional[KnowledgeFilesResponse]) +def remove_file_from_knowledge_by_id( + id: str, + form_data: KnowledgeFileIdForm, + user=Depends(get_admin_user), +): + knowledge = Knowledges.get_knowledge_by_id(id=id) + file = Files.get_file_by_id(form_data.file_id) + if not file: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.NOT_FOUND, + ) + + # Remove content from the vector database + VECTOR_DB_CLIENT.delete( + collection_name=knowledge.id, filter={"file_id": form_data.file_id} + ) + + result = VECTOR_DB_CLIENT.query( + collection_name=knowledge.id, + filter={"file_id": form_data.file_id}, + ) + + Files.delete_file_by_id(form_data.file_id) + + if knowledge: + data = knowledge.data or {} + file_ids = data.get("file_ids", []) + + if form_data.file_id in file_ids: + file_ids.remove(form_data.file_id) + data["file_ids"] = file_ids + + knowledge = Knowledges.update_knowledge_by_id( + id=id, form_data=KnowledgeUpdateForm(data=data) + ) + + if knowledge: + files = Files.get_files_by_ids(file_ids) + + return KnowledgeFilesResponse( + **knowledge.model_dump(), + files=files, + ) + else: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.DEFAULT("knowledge"), + ) + else: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.DEFAULT("file_id"), + ) + else: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.NOT_FOUND, + ) + + +############################ +# DeleteKnowledgeById +############################ + + +@router.delete("/{id}/delete", response_model=bool) +async def delete_knowledge_by_id(id: str, user=Depends(get_admin_user)): + VECTOR_DB_CLIENT.delete_collection(collection_name=id) + result = Knowledges.delete_knowledge_by_id(id=id) + return result diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index 3c28ab01f..bd02c917c 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -56,9 +56,6 @@ def run_migrations(): print(f"Error: {e}") -run_migrations() - - class Config(Base): __tablename__ = "config" diff --git a/backend/open_webui/constants.py b/backend/open_webui/constants.py index 98dbe32b2..37461402b 100644 --- a/backend/open_webui/constants.py +++ b/backend/open_webui/constants.py @@ -34,8 +34,8 @@ class ERROR_MESSAGES(str, Enum): ID_TAKEN = "Uh-oh! This id is already registered. Please choose another id string." MODEL_ID_TAKEN = "Uh-oh! This model id is already registered. Please choose another model id string." - NAME_TAG_TAKEN = "Uh-oh! This name tag is already registered. Please choose another name tag string." + INVALID_TOKEN = ( "Your session has expired or the token is invalid. Please sign in again." ) @@ -94,6 +94,11 @@ class ERROR_MESSAGES(str, Enum): lambda size="": f"Oops! The file you're trying to upload is too large. Please upload a file that is less than {size}." ) + DUPLICATE_CONTENT = ( + "Duplicate content detected. Please provide unique content to proceed." + ) + FILE_NOT_PROCESSED = "Extracted content is not available for this file. Please ensure that the file is processed before proceeding." + class TASKS(str, Enum): def __str__(self) -> str: diff --git a/backend/open_webui/migrations/scripts/revision.py b/backend/open_webui/migrations/scripts/revision.py deleted file mode 100644 index 32ebc9e35..000000000 --- a/backend/open_webui/migrations/scripts/revision.py +++ /dev/null @@ -1,19 +0,0 @@ -from alembic import command -from alembic.config import Config - -from open_webui.env import OPEN_WEBUI_DIR - -alembic_cfg = Config(OPEN_WEBUI_DIR / "alembic.ini") - -# Set the script location dynamically -migrations_path = OPEN_WEBUI_DIR / "migrations" -alembic_cfg.set_main_option("script_location", str(migrations_path)) - - -def revision(message: str) -> None: - command.revision(alembic_cfg, message=message, autogenerate=False) - - -if __name__ == "__main__": - input_message = input("Enter the revision message: ") - revision(input_message) diff --git a/backend/open_webui/migrations/util.py b/backend/open_webui/migrations/util.py index 401bb94d0..955066602 100644 --- a/backend/open_webui/migrations/util.py +++ b/backend/open_webui/migrations/util.py @@ -7,3 +7,9 @@ def get_existing_tables(): inspector = Inspector.from_engine(con) tables = set(inspector.get_table_names()) return tables + + +def get_revision_id(): + import uuid + + return str(uuid.uuid4()).replace("-", "")[:12] diff --git a/backend/open_webui/migrations/versions/6a39f3d8e55c_add_knowledge_table.py b/backend/open_webui/migrations/versions/6a39f3d8e55c_add_knowledge_table.py new file mode 100644 index 000000000..881e6ae64 --- /dev/null +++ b/backend/open_webui/migrations/versions/6a39f3d8e55c_add_knowledge_table.py @@ -0,0 +1,80 @@ +"""Add knowledge table + +Revision ID: 6a39f3d8e55c +Revises: c0fbf31ca0db +Create Date: 2024-10-01 14:02:35.241684 + +""" + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.sql import table, column, select +import json + + +revision = "6a39f3d8e55c" +down_revision = "c0fbf31ca0db" +branch_labels = None +depends_on = None + + +def upgrade(): + # Creating the 'knowledge' table + print("Creating knowledge table") + knowledge_table = op.create_table( + "knowledge", + sa.Column("id", sa.Text(), primary_key=True), + sa.Column("user_id", sa.Text(), nullable=False), + sa.Column("name", sa.Text(), nullable=False), + sa.Column("description", sa.Text(), nullable=True), + sa.Column("data", sa.JSON(), nullable=True), + sa.Column("meta", sa.JSON(), nullable=True), + sa.Column("created_at", sa.BigInteger(), nullable=False), + sa.Column("updated_at", sa.BigInteger(), nullable=True), + ) + + print("Migrating data from document table to knowledge table") + # Representation of the existing 'document' table + document_table = table( + "document", + column("collection_name", sa.String()), + column("user_id", sa.String()), + column("name", sa.String()), + column("title", sa.Text()), + column("content", sa.Text()), + column("timestamp", sa.BigInteger()), + ) + + # Select all from existing document table + documents = op.get_bind().execute( + select( + document_table.c.collection_name, + document_table.c.user_id, + document_table.c.name, + document_table.c.title, + document_table.c.content, + document_table.c.timestamp, + ) + ) + + # Insert data into knowledge table from document table + for doc in documents: + op.get_bind().execute( + knowledge_table.insert().values( + id=doc.collection_name, + user_id=doc.user_id, + description=doc.name, + meta={ + "legacy": True, + "document": True, + "tags": json.loads(doc.content or "{}").get("tags", []), + }, + name=doc.title, + created_at=doc.timestamp, + updated_at=doc.timestamp, # using created_at for both created_at and updated_at in project + ) + ) + + +def downgrade(): + op.drop_table("knowledge") diff --git a/backend/open_webui/migrations/versions/c0fbf31ca0db_update_file_table.py b/backend/open_webui/migrations/versions/c0fbf31ca0db_update_file_table.py new file mode 100644 index 000000000..5f7f2abf7 --- /dev/null +++ b/backend/open_webui/migrations/versions/c0fbf31ca0db_update_file_table.py @@ -0,0 +1,32 @@ +"""Update file table + +Revision ID: c0fbf31ca0db +Revises: ca81bd47c050 +Create Date: 2024-09-20 15:26:35.241684 + +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "c0fbf31ca0db" +down_revision: Union[str, None] = "ca81bd47c050" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column("file", sa.Column("hash", sa.Text(), nullable=True)) + op.add_column("file", sa.Column("data", sa.JSON(), nullable=True)) + op.add_column("file", sa.Column("updated_at", sa.BigInteger(), nullable=True)) + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column("file", "updated_at") + op.drop_column("file", "data") + op.drop_column("file", "hash") diff --git a/src/lib/apis/files/index.ts b/src/lib/apis/files/index.ts index 630a9e7c5..b76143471 100644 --- a/src/lib/apis/files/index.ts +++ b/src/lib/apis/files/index.ts @@ -92,6 +92,40 @@ export const getFileById = async (token: string, id: string) => { return res; }; +export const updateFileDataContentById = async (token: string, id: string, content: string) => { + let error = null; + + const res = await fetch(`${WEBUI_API_BASE_URL}/files/${id}/data/content/update`, { + method: 'POST', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/json', + authorization: `Bearer ${token}` + }, + body: JSON.stringify({ + content: content + }) + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .then((json) => { + return json; + }) + .catch((err) => { + error = err.detail; + console.log(err); + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + export const getFileContentById = async (id: string) => { let error = null; diff --git a/src/lib/apis/knowledge/index.ts b/src/lib/apis/knowledge/index.ts new file mode 100644 index 000000000..9923624a6 --- /dev/null +++ b/src/lib/apis/knowledge/index.ts @@ -0,0 +1,276 @@ +import { WEBUI_API_BASE_URL } from '$lib/constants'; + +export const createNewKnowledge = async (token: string, name: string, description: string) => { + let error = null; + + const res = await fetch(`${WEBUI_API_BASE_URL}/knowledge/create`, { + method: 'POST', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/json', + authorization: `Bearer ${token}` + }, + body: JSON.stringify({ + name: name, + description: description + }) + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .catch((err) => { + error = err.detail; + console.log(err); + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + +export const getKnowledgeItems = async (token: string = '') => { + let error = null; + + const res = await fetch(`${WEBUI_API_BASE_URL}/knowledge/`, { + method: 'GET', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/json', + authorization: `Bearer ${token}` + } + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .then((json) => { + return json; + }) + .catch((err) => { + error = err.detail; + console.log(err); + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + +export const getKnowledgeById = async (token: string, id: string) => { + let error = null; + + const res = await fetch(`${WEBUI_API_BASE_URL}/knowledge/${id}`, { + method: 'GET', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/json', + authorization: `Bearer ${token}` + } + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .then((json) => { + return json; + }) + .catch((err) => { + error = err.detail; + + console.log(err); + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + +type KnowledgeUpdateForm = { + name?: string; + description?: string; + data?: object; +}; + +export const updateKnowledgeById = async (token: string, id: string, form: KnowledgeUpdateForm) => { + let error = null; + + const res = await fetch(`${WEBUI_API_BASE_URL}/knowledge/${id}/update`, { + method: 'POST', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/json', + authorization: `Bearer ${token}` + }, + body: JSON.stringify({ + name: form?.name ? form.name : undefined, + description: form?.description ? form.description : undefined, + data: form?.data ? form.data : undefined + }) + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .then((json) => { + return json; + }) + .catch((err) => { + error = err.detail; + + console.log(err); + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + +export const addFileToKnowledgeById = async (token: string, id: string, fileId: string) => { + let error = null; + + const res = await fetch(`${WEBUI_API_BASE_URL}/knowledge/${id}/file/add`, { + method: 'POST', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/json', + authorization: `Bearer ${token}` + }, + body: JSON.stringify({ + file_id: fileId + }) + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .then((json) => { + return json; + }) + .catch((err) => { + error = err.detail; + + console.log(err); + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + +export const updateFileFromKnowledgeById = async (token: string, id: string, fileId: string) => { + let error = null; + + const res = await fetch(`${WEBUI_API_BASE_URL}/knowledge/${id}/file/update`, { + method: 'POST', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/json', + authorization: `Bearer ${token}` + }, + body: JSON.stringify({ + file_id: fileId + }) + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .then((json) => { + return json; + }) + .catch((err) => { + error = err.detail; + + console.log(err); + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + +export const removeFileFromKnowledgeById = async (token: string, id: string, fileId: string) => { + let error = null; + + const res = await fetch(`${WEBUI_API_BASE_URL}/knowledge/${id}/file/remove`, { + method: 'POST', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/json', + authorization: `Bearer ${token}` + }, + body: JSON.stringify({ + file_id: fileId + }) + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .then((json) => { + return json; + }) + .catch((err) => { + error = err.detail; + + console.log(err); + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + +export const deleteKnowledgeById = async (token: string, id: string) => { + let error = null; + + const res = await fetch(`${WEBUI_API_BASE_URL}/knowledge/${id}/delete`, { + method: 'DELETE', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/json', + authorization: `Bearer ${token}` + } + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .then((json) => { + return json; + }) + .catch((err) => { + error = err.detail; + + console.log(err); + return null; + }); + + if (error) { + throw error; + } + + return res; +}; diff --git a/src/lib/apis/retrieval/index.ts b/src/lib/apis/retrieval/index.ts index cf86e951c..2c4c24cb6 100644 --- a/src/lib/apis/retrieval/index.ts +++ b/src/lib/apis/retrieval/index.ts @@ -306,7 +306,11 @@ export interface SearchDocument { filenames: string[]; } -export const processFile = async (token: string, file_id: string) => { +export const processFile = async ( + token: string, + file_id: string, + collection_name: string | null = null +) => { let error = null; const res = await fetch(`${RAG_API_BASE_URL}/process/file`, { @@ -317,7 +321,8 @@ export const processFile = async (token: string, file_id: string) => { authorization: `Bearer ${token}` }, body: JSON.stringify({ - file_id: file_id + file_id: file_id, + collection_name: collection_name ? collection_name : undefined }) }) .then(async (res) => { diff --git a/src/lib/components/AddFilesPlaceholder.svelte b/src/lib/components/AddFilesPlaceholder.svelte index 3bdbe9281..a3057c560 100644 --- a/src/lib/components/AddFilesPlaceholder.svelte +++ b/src/lib/components/AddFilesPlaceholder.svelte @@ -1,10 +1,18 @@
📄
-
{$i18n.t('Add Files')}
+
+ {#if title} + {title} + {:else} + {$i18n.t('Add Files')} + {/if} +
diff --git a/src/lib/components/admin/Settings/Documents.svelte b/src/lib/components/admin/Settings/Documents.svelte index c10b60aa0..d4ff6bd66 100644 --- a/src/lib/components/admin/Settings/Documents.svelte +++ b/src/lib/components/admin/Settings/Documents.svelte @@ -1,10 +1,10 @@ {#if filteredItems.length > 0 || prompt.split(' ')?.at(0)?.substring(1).startsWith('http')} @@ -124,39 +128,50 @@ class="max-h-60 flex flex-col w-full rounded-r-xl bg-white dark:bg-gray-900 dark:text-gray-100" >
- {#each filteredItems as doc, docIdx} + {#each filteredItems as item, idx} {/each} diff --git a/src/lib/components/chat/Messages/UserMessage.svelte b/src/lib/components/chat/Messages/UserMessage.svelte index 8a087b2bc..2e4313464 100644 --- a/src/lib/components/chat/Messages/UserMessage.svelte +++ b/src/lib/components/chat/Messages/UserMessage.svelte @@ -127,7 +127,7 @@ input {:else} + export let type = 'info'; + export let content = ''; + + const classNames: Record = { + info: 'bg-blue-500/20 text-blue-700 dark:text-blue-200 ', + success: 'bg-green-500/20 text-green-700 dark:text-green-200', + warning: 'bg-yellow-500/20 text-yellow-700 dark:text-yellow-200', + error: 'bg-red-500/20 text-red-700 dark:text-red-200' + }; + + +
+ {content} +
diff --git a/src/lib/components/common/FileItem.svelte b/src/lib/components/common/FileItem.svelte index 316587b18..66ac75bb4 100644 --- a/src/lib/components/common/FileItem.svelte +++ b/src/lib/components/common/FileItem.svelte @@ -3,18 +3,19 @@ import { formatFileSize } from '$lib/utils'; import FileItemModal from './FileItemModal.svelte'; + import GarbageBin from '../icons/GarbageBin.svelte'; const i18n = getContext('i18n'); const dispatch = createEventDispatcher(); - export let className = 'w-72'; - export let colorClassName = 'bg-white dark:bg-gray-800'; + export let className = 'w-60'; + export let colorClassName = 'bg-white dark:bg-gray-850 border border-gray-50 dark:border-white/5'; export let url: string | null = null; export let dismissible = false; export let status = 'processed'; - export let file = null; + export let item = null; export let edit = false; export let name: string; @@ -24,115 +25,113 @@ let showModal = false; -{#if file} - +{#if item} + {/if} -
- +
{#if dismissible}
+ +
{/if} -
+ diff --git a/src/lib/components/common/FileItemModal.svelte b/src/lib/components/common/FileItemModal.svelte index f97e4f33d..6408ad05d 100644 --- a/src/lib/components/common/FileItemModal.svelte +++ b/src/lib/components/common/FileItemModal.svelte @@ -10,7 +10,7 @@ import Switch from './Switch.svelte'; import Tooltip from './Tooltip.svelte'; - export let file; + export let item; export let show = false; export let edit = false; @@ -18,9 +18,9 @@ let enableFullContent = false; onMount(() => { - console.log(file); + console.log(item); - if (file?.context === 'full') { + if (item?.context === 'full') { enableFullContent = true; } }); @@ -33,11 +33,11 @@
@@ -56,14 +56,14 @@
- {#if file.size} -
{formatFileSize(file.size)}
+ {#if item.size} +
{formatFileSize(item.size)}
• {/if} - {#if file?.file?.content} + {#if item?.file?.data?.content}
- {getLineCount(file?.file?.content ?? '')} extracted lines + {getLineCount(item?.file?.data?.content ?? '')} extracted lines
@@ -90,7 +90,7 @@ { - file.context = e.detail ? 'full' : undefined; + item.context = e.detail ? 'full' : undefined; }} />
@@ -102,7 +102,7 @@
- {file?.file?.content ?? 'No content'} + {item?.file?.data?.content ?? 'No content'}
diff --git a/src/lib/components/icons/BarsArrowUp.svelte b/src/lib/components/icons/BarsArrowUp.svelte new file mode 100644 index 000000000..d34dbde67 --- /dev/null +++ b/src/lib/components/icons/BarsArrowUp.svelte @@ -0,0 +1,19 @@ + + + + + diff --git a/src/lib/components/icons/BookOpen.svelte b/src/lib/components/icons/BookOpen.svelte new file mode 100644 index 000000000..5a77433d5 --- /dev/null +++ b/src/lib/components/icons/BookOpen.svelte @@ -0,0 +1,19 @@ + + + + + diff --git a/src/lib/components/icons/FloppyDisk.svelte b/src/lib/components/icons/FloppyDisk.svelte new file mode 100644 index 000000000..bcb481e82 --- /dev/null +++ b/src/lib/components/icons/FloppyDisk.svelte @@ -0,0 +1,20 @@ + + + diff --git a/src/lib/components/workspace/Knowledge.svelte b/src/lib/components/workspace/Knowledge.svelte new file mode 100644 index 000000000..1706ba0aa --- /dev/null +++ b/src/lib/components/workspace/Knowledge.svelte @@ -0,0 +1,195 @@ + + + + + {$i18n.t('Knowledge')} | {$WEBUI_NAME} + + + + { + deleteHandler(selectedItem); + }} +/> + +
+
+
+ {$i18n.t('Knowledge')} +
+ {$knowledge.length} +
+
+
+ +
+
+
+ + + +
+ +
+ +
+ +
+
+ +
+ +
+ {#each filteredItems as item} + + {/each} +
+ +
+ ⓘ {$i18n.t("Use '#' in the prompt input to load and include your knowledge.")} +
diff --git a/src/lib/components/workspace/Knowledge/Collection.svelte b/src/lib/components/workspace/Knowledge/Collection.svelte new file mode 100644 index 000000000..441e0b392 --- /dev/null +++ b/src/lib/components/workspace/Knowledge/Collection.svelte @@ -0,0 +1,479 @@ + + +{#if dragged} +
+
+
+
+ +
+ Drop any files here to add to my documents +
+
+
+
+
+
+{/if} + + { + const file = createFileFromText(e.detail.name, e.detail.content); + uploadFileHandler(file); + }} +/> + + { + if (inputFiles && inputFiles.length > 0) { + for (const file of inputFiles) { + uploadFileHandler(file); + } + + inputFiles = null; + const fileInputElement = document.getElementById('files-input'); + + if (fileInputElement) { + fileInputElement.value = ''; + } + } else { + toast.error($i18n.t(`File not found.`)); + } + }} +/> + +
+ + +