diff --git a/Dockerfile b/Dockerfile index 5102afd28..d7de72f01 100644 --- a/Dockerfile +++ b/Dockerfile @@ -26,6 +26,9 @@ ARG BUILD_HASH WORKDIR /app +# to store git revision in build +RUN apk add --no-cache git + COPY package.json package-lock.json ./ RUN npm ci diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index 38bd709f1..25f0e6897 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -989,6 +989,26 @@ DEFAULT_USER_ROLE = PersistentConfig( os.getenv("DEFAULT_USER_ROLE", "pending"), ) +PENDING_USER_OVERLAY_TITLE = PersistentConfig( + "PENDING_USER_OVERLAY_TITLE", + "ui.pending_user_overlay_title", + os.environ.get("PENDING_USER_OVERLAY_TITLE", ""), +) + +PENDING_USER_OVERLAY_CONTENT = PersistentConfig( + "PENDING_USER_OVERLAY_CONTENT", + "ui.pending_user_overlay_content", + os.environ.get("PENDING_USER_OVERLAY_CONTENT", ""), +) + + +RESPONSE_WATERMARK = PersistentConfig( + "RESPONSE_WATERMARK", + "ui.watermark", + os.environ.get("RESPONSE_WATERMARK", ""), +) + + USER_PERMISSIONS_WORKSPACE_MODELS_ACCESS = ( os.environ.get("USER_PERMISSIONS_WORKSPACE_MODELS_ACCESS", "False").lower() == "true" @@ -1731,6 +1751,7 @@ QDRANT_API_KEY = os.environ.get("QDRANT_API_KEY", None) QDRANT_ON_DISK = os.environ.get("QDRANT_ON_DISK", "false").lower() == "true" QDRANT_PREFER_GRPC = os.environ.get("QDRANT_PREFER_GRPC", "False").lower() == "true" QDRANT_GRPC_PORT = int(os.environ.get("QDRANT_GRPC_PORT", "6334")) +ENABLE_QDRANT_MULTITENANCY_MODE = os.environ.get("ENABLE_QDRANT_MULTITENANCY_MODE", "false").lower() == "true" # OpenSearch OPENSEARCH_URI = os.environ.get("OPENSEARCH_URI", "https://localhost:9200") @@ -1825,6 +1846,18 @@ CONTENT_EXTRACTION_ENGINE = PersistentConfig( os.environ.get("CONTENT_EXTRACTION_ENGINE", "").lower(), ) +EXTERNAL_DOCUMENT_LOADER_URL = PersistentConfig( + "EXTERNAL_DOCUMENT_LOADER_URL", + "rag.external_document_loader_url", + os.environ.get("EXTERNAL_DOCUMENT_LOADER_URL", ""), +) + +EXTERNAL_DOCUMENT_LOADER_API_KEY = PersistentConfig( + "EXTERNAL_DOCUMENT_LOADER_API_KEY", + "rag.external_document_loader_api_key", + os.environ.get("EXTERNAL_DOCUMENT_LOADER_API_KEY", ""), +) + TIKA_SERVER_URL = PersistentConfig( "TIKA_SERVER_URL", "rag.tika_server_url", @@ -1849,6 +1882,12 @@ DOCLING_OCR_LANG = PersistentConfig( os.getenv("DOCLING_OCR_LANG", "eng,fra,deu,spa"), ) +DOCLING_DO_PICTURE_DESCRIPTION = PersistentConfig( + "DOCLING_DO_PICTURE_DESCRIPTION", + "rag.docling_do_picture_description", + os.getenv("DOCLING_DO_PICTURE_DESCRIPTION", "False").lower() == "true", +) + DOCUMENT_INTELLIGENCE_ENDPOINT = PersistentConfig( "DOCUMENT_INTELLIGENCE_ENDPOINT", "rag.document_intelligence_endpoint", @@ -1920,6 +1959,12 @@ RAG_FILE_MAX_SIZE = PersistentConfig( ), ) +RAG_ALLOWED_FILE_EXTENSIONS = PersistentConfig( + "RAG_ALLOWED_FILE_EXTENSIONS", + "rag.file.allowed_extensions", + os.environ.get("RAG_ALLOWED_FILE_EXTENSIONS", "").split(","), +) + RAG_EMBEDDING_ENGINE = PersistentConfig( "RAG_EMBEDDING_ENGINE", "rag.embedding_engine", @@ -2839,6 +2884,12 @@ LDAP_CA_CERT_FILE = PersistentConfig( os.environ.get("LDAP_CA_CERT_FILE", ""), ) +LDAP_VALIDATE_CERT = PersistentConfig( + "LDAP_VALIDATE_CERT", + "ldap.server.validate_cert", + os.environ.get("LDAP_VALIDATE_CERT", "True").lower() == "true", +) + LDAP_CIPHERS = PersistentConfig( "LDAP_CIPHERS", "ldap.server.ciphers", os.environ.get("LDAP_CIPHERS", "ALL") ) diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index e5fdace6d..646db6846 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -197,6 +197,7 @@ from open_webui.config import ( RAG_EMBEDDING_ENGINE, RAG_EMBEDDING_BATCH_SIZE, RAG_RELEVANCE_THRESHOLD, + RAG_ALLOWED_FILE_EXTENSIONS, RAG_FILE_MAX_COUNT, RAG_FILE_MAX_SIZE, RAG_OPENAI_API_BASE_URL, @@ -206,10 +207,13 @@ from open_webui.config import ( CHUNK_OVERLAP, CHUNK_SIZE, CONTENT_EXTRACTION_ENGINE, + EXTERNAL_DOCUMENT_LOADER_URL, + EXTERNAL_DOCUMENT_LOADER_API_KEY, TIKA_SERVER_URL, DOCLING_SERVER_URL, DOCLING_OCR_ENGINE, DOCLING_OCR_LANG, + DOCLING_DO_PICTURE_DESCRIPTION, DOCUMENT_INTELLIGENCE_ENDPOINT, DOCUMENT_INTELLIGENCE_KEY, MISTRAL_OCR_API_KEY, @@ -291,6 +295,8 @@ from open_webui.config import ( ENABLE_EVALUATION_ARENA_MODELS, USER_PERMISSIONS, DEFAULT_USER_ROLE, + PENDING_USER_OVERLAY_CONTENT, + PENDING_USER_OVERLAY_TITLE, DEFAULT_PROMPT_SUGGESTIONS, DEFAULT_MODELS, DEFAULT_ARENA_MODEL, @@ -317,6 +323,7 @@ from open_webui.config import ( LDAP_APP_PASSWORD, LDAP_USE_TLS, LDAP_CA_CERT_FILE, + LDAP_VALIDATE_CERT, LDAP_CIPHERS, # Misc ENV, @@ -327,6 +334,7 @@ from open_webui.config import ( DEFAULT_LOCALE, OAUTH_PROVIDERS, WEBUI_URL, + RESPONSE_WATERMARK, # Admin ENABLE_ADMIN_CHAT_ACCESS, ENABLE_ADMIN_EXPORT, @@ -373,6 +381,7 @@ from open_webui.env import ( OFFLINE_MODE, ENABLE_OTEL, EXTERNAL_PWA_MANIFEST_URL, + AIOHTTP_CLIENT_SESSION_SSL, ) @@ -573,6 +582,11 @@ app.state.config.DEFAULT_MODELS = DEFAULT_MODELS app.state.config.DEFAULT_PROMPT_SUGGESTIONS = DEFAULT_PROMPT_SUGGESTIONS app.state.config.DEFAULT_USER_ROLE = DEFAULT_USER_ROLE +app.state.config.PENDING_USER_OVERLAY_CONTENT = PENDING_USER_OVERLAY_CONTENT +app.state.config.PENDING_USER_OVERLAY_TITLE = PENDING_USER_OVERLAY_TITLE + +app.state.config.RESPONSE_WATERMARK = RESPONSE_WATERMARK + app.state.config.USER_PERMISSIONS = USER_PERMISSIONS app.state.config.WEBHOOK_URL = WEBHOOK_URL app.state.config.BANNERS = WEBUI_BANNERS @@ -609,6 +623,7 @@ app.state.config.LDAP_SEARCH_BASE = LDAP_SEARCH_BASE app.state.config.LDAP_SEARCH_FILTERS = LDAP_SEARCH_FILTERS app.state.config.LDAP_USE_TLS = LDAP_USE_TLS app.state.config.LDAP_CA_CERT_FILE = LDAP_CA_CERT_FILE +app.state.config.LDAP_VALIDATE_CERT = LDAP_VALIDATE_CERT app.state.config.LDAP_CIPHERS = LDAP_CIPHERS @@ -631,6 +646,7 @@ app.state.FUNCTIONS = {} app.state.config.TOP_K = RAG_TOP_K app.state.config.TOP_K_RERANKER = RAG_TOP_K_RERANKER app.state.config.RELEVANCE_THRESHOLD = RAG_RELEVANCE_THRESHOLD +app.state.config.ALLOWED_FILE_EXTENSIONS = RAG_ALLOWED_FILE_EXTENSIONS app.state.config.FILE_MAX_SIZE = RAG_FILE_MAX_SIZE app.state.config.FILE_MAX_COUNT = RAG_FILE_MAX_COUNT @@ -641,10 +657,13 @@ app.state.config.ENABLE_RAG_HYBRID_SEARCH = ENABLE_RAG_HYBRID_SEARCH app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION = ENABLE_WEB_LOADER_SSL_VERIFICATION app.state.config.CONTENT_EXTRACTION_ENGINE = CONTENT_EXTRACTION_ENGINE +app.state.config.EXTERNAL_DOCUMENT_LOADER_URL = EXTERNAL_DOCUMENT_LOADER_URL +app.state.config.EXTERNAL_DOCUMENT_LOADER_API_KEY = EXTERNAL_DOCUMENT_LOADER_API_KEY app.state.config.TIKA_SERVER_URL = TIKA_SERVER_URL app.state.config.DOCLING_SERVER_URL = DOCLING_SERVER_URL app.state.config.DOCLING_OCR_ENGINE = DOCLING_OCR_ENGINE app.state.config.DOCLING_OCR_LANG = DOCLING_OCR_LANG +app.state.config.DOCLING_DO_PICTURE_DESCRIPTION = DOCLING_DO_PICTURE_DESCRIPTION app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT = DOCUMENT_INTELLIGENCE_ENDPOINT app.state.config.DOCUMENT_INTELLIGENCE_KEY = DOCUMENT_INTELLIGENCE_KEY app.state.config.MISTRAL_OCR_API_KEY = MISTRAL_OCR_API_KEY @@ -1170,8 +1189,8 @@ async def chat_completion( "tool_ids": form_data.get("tool_ids", None), "tool_servers": form_data.pop("tool_servers", None), "files": form_data.get("files", None), - "features": form_data.get("features", None), - "variables": form_data.get("variables", None), + "features": form_data.get("features", {}), + "variables": form_data.get("variables", {}), "model": model, "direct": model_item.get("direct", False), **( @@ -1395,6 +1414,11 @@ async def get_app_config(request: Request): "sharepoint_url": ONEDRIVE_SHAREPOINT_URL.value, "sharepoint_tenant_id": ONEDRIVE_SHAREPOINT_TENANT_ID.value, }, + "ui": { + "pending_user_overlay_title": app.state.config.PENDING_USER_OVERLAY_TITLE, + "pending_user_overlay_content": app.state.config.PENDING_USER_OVERLAY_CONTENT, + "response_watermark": app.state.config.RESPONSE_WATERMARK, + }, "license_metadata": app.state.LICENSE_METADATA, **( { @@ -1446,7 +1470,8 @@ async def get_app_latest_release_version(user=Depends(get_verified_user)): timeout = aiohttp.ClientTimeout(total=1) async with aiohttp.ClientSession(timeout=timeout, trust_env=True) as session: async with session.get( - "https://api.github.com/repos/open-webui/open-webui/releases/latest" + "https://api.github.com/repos/open-webui/open-webui/releases/latest", + ssl=AIOHTTP_CLIENT_SESSION_SSL, ) as response: response.raise_for_status() data = await response.json() diff --git a/backend/open_webui/retrieval/loaders/external_document.py b/backend/open_webui/retrieval/loaders/external_document.py new file mode 100644 index 000000000..6119da379 --- /dev/null +++ b/backend/open_webui/retrieval/loaders/external_document.py @@ -0,0 +1,58 @@ +import requests +import logging +from typing import Iterator, List, Union + +from langchain_core.document_loaders import BaseLoader +from langchain_core.documents import Document +from open_webui.env import SRC_LOG_LEVELS + +log = logging.getLogger(__name__) +log.setLevel(SRC_LOG_LEVELS["RAG"]) + + +class ExternalDocumentLoader(BaseLoader): + def __init__( + self, + file_path, + url: str, + api_key: str, + mime_type=None, + **kwargs, + ) -> None: + self.url = url + self.api_key = api_key + + self.file_path = file_path + self.mime_type = mime_type + + def load(self) -> list[Document]: + with open(self.file_path, "rb") as f: + data = f.read() + + headers = {} + if self.mime_type is not None: + headers["Content-Type"] = self.mime_type + + if self.api_key is not None: + headers["Authorization"] = f"Bearer {self.api_key}" + + url = self.url + if url.endswith("/"): + url = url[:-1] + + r = requests.put(f"{url}/process", data=data, headers=headers) + + if r.ok: + res = r.json() + + if res: + return [ + Document( + page_content=res.get("page_content"), + metadata=res.get("metadata"), + ) + ] + else: + raise Exception("Error loading document: No content returned") + else: + raise Exception(f"Error loading document: {r.status_code} {r.text}") diff --git a/backend/open_webui/retrieval/loaders/external.py b/backend/open_webui/retrieval/loaders/external_web.py similarity index 95% rename from backend/open_webui/retrieval/loaders/external.py rename to backend/open_webui/retrieval/loaders/external_web.py index 642cfd3a5..68ed66162 100644 --- a/backend/open_webui/retrieval/loaders/external.py +++ b/backend/open_webui/retrieval/loaders/external_web.py @@ -10,7 +10,7 @@ log = logging.getLogger(__name__) log.setLevel(SRC_LOG_LEVELS["RAG"]) -class ExternalLoader(BaseLoader): +class ExternalWebLoader(BaseLoader): def __init__( self, web_paths: Union[str, List[str]], @@ -32,7 +32,7 @@ class ExternalLoader(BaseLoader): response = requests.post( self.external_url, headers={ - "User-Agent": "Open WebUI (https://github.com/open-webui/open-webui) RAG Bot", + "User-Agent": "Open WebUI (https://github.com/open-webui/open-webui) External Web Loader", "Authorization": f"Bearer {self.external_api_key}", }, json={ diff --git a/backend/open_webui/retrieval/loaders/main.py b/backend/open_webui/retrieval/loaders/main.py index 8e7b5a3da..c5f0b4e5e 100644 --- a/backend/open_webui/retrieval/loaders/main.py +++ b/backend/open_webui/retrieval/loaders/main.py @@ -21,6 +21,8 @@ from langchain_community.document_loaders import ( ) from langchain_core.documents import Document + +from open_webui.retrieval.loaders.external_document import ExternalDocumentLoader from open_webui.retrieval.loaders.mistral import MistralLoader from open_webui.env import SRC_LOG_LEVELS, GLOBAL_LOG_LEVEL @@ -126,14 +128,12 @@ class TikaLoader: class DoclingLoader: - def __init__( - self, url, file_path=None, mime_type=None, ocr_engine=None, ocr_lang=None - ): + def __init__(self, url, file_path=None, mime_type=None, params=None): self.url = url.rstrip("/") self.file_path = file_path self.mime_type = mime_type - self.ocr_engine = ocr_engine - self.ocr_lang = ocr_lang + + self.params = params or {} def load(self) -> list[Document]: with open(self.file_path, "rb") as f: @@ -150,11 +150,19 @@ class DoclingLoader: "table_mode": "accurate", } - if self.ocr_engine and self.ocr_lang: - params["ocr_engine"] = self.ocr_engine - params["ocr_lang"] = [ - lang.strip() for lang in self.ocr_lang.split(",") if lang.strip() - ] + if self.params: + if self.params.get("do_picture_classification"): + params["do_picture_classification"] = self.params.get( + "do_picture_classification" + ) + + if self.params.get("ocr_engine") and self.params.get("ocr_lang"): + params["ocr_engine"] = self.params.get("ocr_engine") + params["ocr_lang"] = [ + lang.strip() + for lang in self.params.get("ocr_lang").split(",") + if lang.strip() + ] endpoint = f"{self.url}/v1alpha/convert/file" r = requests.post(endpoint, files=files, data=params) @@ -207,6 +215,17 @@ class Loader: def _get_loader(self, filename: str, file_content_type: str, file_path: str): file_ext = filename.split(".")[-1].lower() + if ( + self.engine == "external" + and self.kwargs.get("EXTERNAL_DOCUMENT_LOADER_URL") + and self.kwargs.get("EXTERNAL_DOCUMENT_LOADER_API_KEY") + ): + loader = ExternalDocumentLoader( + file_path=file_path, + url=self.kwargs.get("EXTERNAL_DOCUMENT_LOADER_URL"), + api_key=self.kwargs.get("EXTERNAL_DOCUMENT_LOADER_API_KEY"), + mime_type=file_content_type, + ) if self.engine == "tika" and self.kwargs.get("TIKA_SERVER_URL"): if self._is_text_file(file_ext, file_content_type): loader = TextLoader(file_path, autodetect_encoding=True) @@ -225,8 +244,13 @@ class Loader: url=self.kwargs.get("DOCLING_SERVER_URL"), file_path=file_path, mime_type=file_content_type, - ocr_engine=self.kwargs.get("DOCLING_OCR_ENGINE"), - ocr_lang=self.kwargs.get("DOCLING_OCR_LANG"), + params={ + "ocr_engine": self.kwargs.get("DOCLING_OCR_ENGINE"), + "ocr_lang": self.kwargs.get("DOCLING_OCR_LANG"), + "do_picture_classification": self.kwargs.get( + "DOCLING_DO_PICTURE_DESCRIPTION" + ), + }, ) elif ( self.engine == "document_intelligence" @@ -258,6 +282,15 @@ class Loader: loader = MistralLoader( api_key=self.kwargs.get("MISTRAL_OCR_API_KEY"), file_path=file_path ) + elif ( + self.engine == "external" + and self.kwargs.get("MISTRAL_OCR_API_KEY") != "" + and file_ext + in ["pdf"] # Mistral OCR currently only supports PDF and images + ): + loader = MistralLoader( + api_key=self.kwargs.get("MISTRAL_OCR_API_KEY"), file_path=file_path + ) else: if file_ext == "pdf": loader = PyPDFLoader( diff --git a/backend/open_webui/retrieval/utils.py b/backend/open_webui/retrieval/utils.py index 2df6a0ab5..a132d7201 100644 --- a/backend/open_webui/retrieval/utils.py +++ b/backend/open_webui/retrieval/utils.py @@ -12,7 +12,7 @@ from langchain_community.retrievers import BM25Retriever from langchain_core.documents import Document from open_webui.config import VECTOR_DB -from open_webui.retrieval.vector.connector import VECTOR_DB_CLIENT +from open_webui.retrieval.vector.factory import VECTOR_DB_CLIENT from open_webui.models.users import UserModel from open_webui.models.files import Files diff --git a/backend/open_webui/retrieval/vector/connector.py b/backend/open_webui/retrieval/vector/connector.py deleted file mode 100644 index 198e6f176..000000000 --- a/backend/open_webui/retrieval/vector/connector.py +++ /dev/null @@ -1,30 +0,0 @@ -from open_webui.config import VECTOR_DB - -if VECTOR_DB == "milvus": - from open_webui.retrieval.vector.dbs.milvus import MilvusClient - - VECTOR_DB_CLIENT = MilvusClient() -elif VECTOR_DB == "qdrant": - from open_webui.retrieval.vector.dbs.qdrant import QdrantClient - - VECTOR_DB_CLIENT = QdrantClient() -elif VECTOR_DB == "opensearch": - from open_webui.retrieval.vector.dbs.opensearch import OpenSearchClient - - VECTOR_DB_CLIENT = OpenSearchClient() -elif VECTOR_DB == "pgvector": - from open_webui.retrieval.vector.dbs.pgvector import PgvectorClient - - VECTOR_DB_CLIENT = PgvectorClient() -elif VECTOR_DB == "elasticsearch": - from open_webui.retrieval.vector.dbs.elasticsearch import ElasticsearchClient - - VECTOR_DB_CLIENT = ElasticsearchClient() -elif VECTOR_DB == "pinecone": - from open_webui.retrieval.vector.dbs.pinecone import PineconeClient - - VECTOR_DB_CLIENT = PineconeClient() -else: - from open_webui.retrieval.vector.dbs.chroma import ChromaClient - - VECTOR_DB_CLIENT = ChromaClient() diff --git a/backend/open_webui/retrieval/vector/dbs/qdrant_multitenancy.py b/backend/open_webui/retrieval/vector/dbs/qdrant_multitenancy.py new file mode 100644 index 000000000..e83c437ef --- /dev/null +++ b/backend/open_webui/retrieval/vector/dbs/qdrant_multitenancy.py @@ -0,0 +1,712 @@ +import logging +from typing import Optional, Tuple +from urllib.parse import urlparse + +import grpc +from open_webui.config import ( + QDRANT_API_KEY, + QDRANT_GRPC_PORT, + QDRANT_ON_DISK, + QDRANT_PREFER_GRPC, + QDRANT_URI, +) +from open_webui.env import SRC_LOG_LEVELS +from open_webui.retrieval.vector.main import ( + GetResult, + SearchResult, + VectorDBBase, + VectorItem, +) +from qdrant_client import QdrantClient as Qclient +from qdrant_client.http.exceptions import UnexpectedResponse +from qdrant_client.http.models import PointStruct +from qdrant_client.models import models + +NO_LIMIT = 999999999 + +log = logging.getLogger(__name__) +log.setLevel(SRC_LOG_LEVELS["RAG"]) + + +class QdrantClient(VectorDBBase): + def __init__(self): + self.collection_prefix = "open-webui" + self.QDRANT_URI = QDRANT_URI + self.QDRANT_API_KEY = QDRANT_API_KEY + self.QDRANT_ON_DISK = QDRANT_ON_DISK + self.PREFER_GRPC = QDRANT_PREFER_GRPC + self.GRPC_PORT = QDRANT_GRPC_PORT + + if not self.QDRANT_URI: + self.client = None + return + + # Unified handling for either scheme + parsed = urlparse(self.QDRANT_URI) + host = parsed.hostname or self.QDRANT_URI + http_port = parsed.port or 6333 # default REST port + + if self.PREFER_GRPC: + self.client = Qclient( + host=host, + port=http_port, + grpc_port=self.GRPC_PORT, + prefer_grpc=self.PREFER_GRPC, + api_key=self.QDRANT_API_KEY, + ) + else: + self.client = Qclient(url=self.QDRANT_URI, api_key=self.QDRANT_API_KEY) + + # Main collection types for multi-tenancy + self.MEMORY_COLLECTION = f"{self.collection_prefix}_memories" + self.KNOWLEDGE_COLLECTION = f"{self.collection_prefix}_knowledge" + self.FILE_COLLECTION = f"{self.collection_prefix}_files" + self.WEB_SEARCH_COLLECTION = f"{self.collection_prefix}_web-search" + self.HASH_BASED_COLLECTION = f"{self.collection_prefix}_hash-based" + + def _result_to_get_result(self, points) -> GetResult: + ids = [] + documents = [] + metadatas = [] + + for point in points: + payload = point.payload + ids.append(point.id) + documents.append(payload["text"]) + metadatas.append(payload["metadata"]) + + return GetResult( + **{ + "ids": [ids], + "documents": [documents], + "metadatas": [metadatas], + } + ) + + def _get_collection_and_tenant_id(self, collection_name: str) -> Tuple[str, str]: + """ + Maps the traditional collection name to multi-tenant collection and tenant ID. + + Returns: + tuple: (collection_name, tenant_id) + """ + # Check for user memory collections + tenant_id = collection_name + + if collection_name.startswith("user-memory-"): + return self.MEMORY_COLLECTION, tenant_id + + # Check for file collections + elif collection_name.startswith("file-"): + return self.FILE_COLLECTION, tenant_id + + # Check for web search collections + elif collection_name.startswith("web-search-"): + return self.WEB_SEARCH_COLLECTION, tenant_id + + # Handle hash-based collections (YouTube and web URLs) + elif len(collection_name) == 63 and all( + c in "0123456789abcdef" for c in collection_name + ): + return self.HASH_BASED_COLLECTION, tenant_id + + else: + return self.KNOWLEDGE_COLLECTION, tenant_id + + def _extract_error_message(self, exception): + """ + Extract error message from either HTTP or gRPC exceptions + + Returns: + tuple: (status_code, error_message) + """ + # Check if it's an HTTP exception + if isinstance(exception, UnexpectedResponse): + try: + error_data = exception.structured() + error_msg = error_data.get("status", {}).get("error", "") + return exception.status_code, error_msg + except Exception as inner_e: + log.error(f"Failed to parse HTTP error: {inner_e}") + return exception.status_code, str(exception) + + # Check if it's a gRPC exception + elif isinstance(exception, grpc.RpcError): + # Extract status code from gRPC error + status_code = None + if hasattr(exception, "code") and callable(exception.code): + status_code = exception.code().value[0] + + # Extract error message + error_msg = str(exception) + if "details =" in error_msg: + # Parse the details line which contains the actual error message + try: + details_line = [ + line.strip() + for line in error_msg.split("\n") + if "details =" in line + ][0] + error_msg = details_line.split("details =")[1].strip(' "') + except (IndexError, AttributeError): + # Fall back to full message if parsing fails + pass + + return status_code, error_msg + + # For any other type of exception + return None, str(exception) + + def _is_collection_not_found_error(self, exception): + """ + Check if the exception is due to collection not found, supporting both HTTP and gRPC + """ + status_code, error_msg = self._extract_error_message(exception) + + # HTTP error (404) + if ( + status_code == 404 + and "Collection" in error_msg + and "doesn't exist" in error_msg + ): + return True + + # gRPC error (NOT_FOUND status) + if ( + isinstance(exception, grpc.RpcError) + and exception.code() == grpc.StatusCode.NOT_FOUND + ): + return True + + return False + + def _is_dimension_mismatch_error(self, exception): + """ + Check if the exception is due to dimension mismatch, supporting both HTTP and gRPC + """ + status_code, error_msg = self._extract_error_message(exception) + + # Common patterns in both HTTP and gRPC + return ( + "Vector dimension error" in error_msg + or "dimensions mismatch" in error_msg + or "invalid vector size" in error_msg + ) + + def _create_multi_tenant_collection_if_not_exists( + self, mt_collection_name: str, dimension: int = 384 + ): + """ + Creates a collection with multi-tenancy configuration if it doesn't exist. + Default dimension is set to 384 which corresponds to 'sentence-transformers/all-MiniLM-L6-v2'. + When creating collections dynamically (insert/upsert), the actual vector dimensions will be used. + """ + try: + # Try to create the collection directly - will fail if it already exists + self.client.create_collection( + collection_name=mt_collection_name, + vectors_config=models.VectorParams( + size=dimension, + distance=models.Distance.COSINE, + on_disk=self.QDRANT_ON_DISK, + ), + hnsw_config=models.HnswConfigDiff( + payload_m=16, # Enable per-tenant indexing + m=0, + on_disk=self.QDRANT_ON_DISK, + ), + ) + + # Create tenant ID payload index + self.client.create_payload_index( + collection_name=mt_collection_name, + field_name="tenant_id", + field_schema=models.KeywordIndexParams( + type=models.KeywordIndexType.KEYWORD, + is_tenant=True, + on_disk=self.QDRANT_ON_DISK, + ), + wait=True, + ) + + log.info( + f"Multi-tenant collection {mt_collection_name} created with dimension {dimension}!" + ) + except (UnexpectedResponse, grpc.RpcError) as e: + # Check for the specific error indicating collection already exists + status_code, error_msg = self._extract_error_message(e) + + # HTTP status code 409 or gRPC ALREADY_EXISTS + if (isinstance(e, UnexpectedResponse) and status_code == 409) or ( + isinstance(e, grpc.RpcError) + and e.code() == grpc.StatusCode.ALREADY_EXISTS + ): + if "already exists" in error_msg: + log.debug(f"Collection {mt_collection_name} already exists") + return + # If it's not an already exists error, re-raise + raise e + except Exception as e: + raise e + + def _create_points(self, items: list[VectorItem], tenant_id: str): + """ + Create point structs from vector items with tenant ID. + """ + return [ + PointStruct( + id=item["id"], + vector=item["vector"], + payload={ + "text": item["text"], + "metadata": item["metadata"], + "tenant_id": tenant_id, + }, + ) + for item in items + ] + + def has_collection(self, collection_name: str) -> bool: + """ + Check if a logical collection exists by checking for any points with the tenant ID. + """ + if not self.client: + return False + + # Map to multi-tenant collection and tenant ID + mt_collection, tenant_id = self._get_collection_and_tenant_id(collection_name) + + # Create tenant filter + tenant_filter = models.FieldCondition( + key="tenant_id", match=models.MatchValue(value=tenant_id) + ) + + try: + # Try directly querying - most of the time collection should exist + response = self.client.query_points( + collection_name=mt_collection, + query_filter=models.Filter(must=[tenant_filter]), + limit=1, + ) + + # Collection exists with this tenant ID if there are points + return len(response.points) > 0 + except (UnexpectedResponse, grpc.RpcError) as e: + if self._is_collection_not_found_error(e): + log.debug(f"Collection {mt_collection} doesn't exist") + return False + else: + # For other API errors, log and return False + _, error_msg = self._extract_error_message(e) + log.warning(f"Unexpected Qdrant error: {error_msg}") + return False + except Exception as e: + # For any other errors, log and return False + log.debug(f"Error checking collection {mt_collection}: {e}") + return False + + def delete( + self, + collection_name: str, + ids: Optional[list[str]] = None, + filter: Optional[dict] = None, + ): + """ + Delete vectors by ID or filter from a collection with tenant isolation. + """ + if not self.client: + return None + + # Map to multi-tenant collection and tenant ID + mt_collection, tenant_id = self._get_collection_and_tenant_id(collection_name) + + # Create tenant filter + tenant_filter = models.FieldCondition( + key="tenant_id", match=models.MatchValue(value=tenant_id) + ) + + must_conditions = [tenant_filter] + should_conditions = [] + + if ids: + for id_value in ids: + should_conditions.append( + models.FieldCondition( + key="metadata.id", + match=models.MatchValue(value=id_value), + ), + ) + elif filter: + for key, value in filter.items(): + must_conditions.append( + models.FieldCondition( + key=f"metadata.{key}", + match=models.MatchValue(value=value), + ), + ) + + try: + # Try to delete directly - most of the time collection should exist + update_result = self.client.delete( + collection_name=mt_collection, + points_selector=models.FilterSelector( + filter=models.Filter(must=must_conditions, should=should_conditions) + ), + ) + + return update_result + except (UnexpectedResponse, grpc.RpcError) as e: + if self._is_collection_not_found_error(e): + log.debug( + f"Collection {mt_collection} doesn't exist, nothing to delete" + ) + return None + else: + # For other API errors, log and re-raise + _, error_msg = self._extract_error_message(e) + log.warning(f"Unexpected Qdrant error: {error_msg}") + raise + except Exception as e: + # For non-Qdrant exceptions, re-raise + raise + + def search( + self, collection_name: str, vectors: list[list[float | int]], limit: int + ) -> Optional[SearchResult]: + """ + Search for the nearest neighbor items based on the vectors with tenant isolation. + """ + if not self.client: + return None + + # Map to multi-tenant collection and tenant ID + mt_collection, tenant_id = self._get_collection_and_tenant_id(collection_name) + + # Get the vector dimension from the query vector + dimension = len(vectors[0]) if vectors and len(vectors) > 0 else None + + try: + # Try the search operation directly - most of the time collection should exist + + # Create tenant filter + tenant_filter = models.FieldCondition( + key="tenant_id", match=models.MatchValue(value=tenant_id) + ) + + # Ensure vector dimensions match the collection + collection_dim = self.client.get_collection( + mt_collection + ).config.params.vectors.size + + if collection_dim != dimension: + if collection_dim < dimension: + vectors = [vector[:collection_dim] for vector in vectors] + else: + vectors = [ + vector + [0] * (collection_dim - dimension) + for vector in vectors + ] + + # Search with tenant filter + prefetch_query = models.Prefetch( + filter=models.Filter(must=[tenant_filter]), + limit=NO_LIMIT, + ) + query_response = self.client.query_points( + collection_name=mt_collection, + query=vectors[0], + prefetch=prefetch_query, + limit=limit, + ) + + get_result = self._result_to_get_result(query_response.points) + return SearchResult( + ids=get_result.ids, + documents=get_result.documents, + metadatas=get_result.metadatas, + # qdrant distance is [-1, 1], normalize to [0, 1] + distances=[ + [(point.score + 1.0) / 2.0 for point in query_response.points] + ], + ) + except (UnexpectedResponse, grpc.RpcError) as e: + if self._is_collection_not_found_error(e): + log.debug( + f"Collection {mt_collection} doesn't exist, search returns None" + ) + return None + else: + # For other API errors, log and re-raise + _, error_msg = self._extract_error_message(e) + log.warning(f"Unexpected Qdrant error during search: {error_msg}") + raise + except Exception as e: + # For non-Qdrant exceptions, log and return None + log.exception(f"Error searching collection '{collection_name}': {e}") + return None + + def query(self, collection_name: str, filter: dict, limit: Optional[int] = None): + """ + Query points with filters and tenant isolation. + """ + if not self.client: + return None + + # Map to multi-tenant collection and tenant ID + mt_collection, tenant_id = self._get_collection_and_tenant_id(collection_name) + + # Set default limit if not provided + if limit is None: + limit = NO_LIMIT + + # Create tenant filter + tenant_filter = models.FieldCondition( + key="tenant_id", match=models.MatchValue(value=tenant_id) + ) + + # Create metadata filters + field_conditions = [] + for key, value in filter.items(): + field_conditions.append( + models.FieldCondition( + key=f"metadata.{key}", match=models.MatchValue(value=value) + ) + ) + + # Combine tenant filter with metadata filters + combined_filter = models.Filter(must=[tenant_filter, *field_conditions]) + + try: + # Try the query directly - most of the time collection should exist + points = self.client.query_points( + collection_name=mt_collection, + query_filter=combined_filter, + limit=limit, + ) + + return self._result_to_get_result(points.points) + except (UnexpectedResponse, grpc.RpcError) as e: + if self._is_collection_not_found_error(e): + log.debug( + f"Collection {mt_collection} doesn't exist, query returns None" + ) + return None + else: + # For other API errors, log and re-raise + _, error_msg = self._extract_error_message(e) + log.warning(f"Unexpected Qdrant error during query: {error_msg}") + raise + except Exception as e: + # For non-Qdrant exceptions, log and re-raise + log.exception(f"Error querying collection '{collection_name}': {e}") + return None + + def get(self, collection_name: str) -> Optional[GetResult]: + """ + Get all items in a collection with tenant isolation. + """ + if not self.client: + return None + + # Map to multi-tenant collection and tenant ID + mt_collection, tenant_id = self._get_collection_and_tenant_id(collection_name) + + # Create tenant filter + tenant_filter = models.FieldCondition( + key="tenant_id", match=models.MatchValue(value=tenant_id) + ) + + try: + # Try to get points directly - most of the time collection should exist + points = self.client.query_points( + collection_name=mt_collection, + query_filter=models.Filter(must=[tenant_filter]), + limit=NO_LIMIT, + ) + + return self._result_to_get_result(points.points) + except (UnexpectedResponse, grpc.RpcError) as e: + if self._is_collection_not_found_error(e): + log.debug(f"Collection {mt_collection} doesn't exist, get returns None") + return None + else: + # For other API errors, log and re-raise + _, error_msg = self._extract_error_message(e) + log.warning(f"Unexpected Qdrant error during get: {error_msg}") + raise + except Exception as e: + # For non-Qdrant exceptions, log and return None + log.exception(f"Error getting collection '{collection_name}': {e}") + return None + + def _handle_operation_with_error_retry( + self, operation_name, mt_collection, points, dimension + ): + """ + Private helper to handle common error cases for insert and upsert operations. + + Args: + operation_name: 'insert' or 'upsert' + mt_collection: The multi-tenant collection name + points: The vector points to insert/upsert + dimension: The dimension of the vectors + + Returns: + The operation result (for upsert) or None (for insert) + """ + try: + if operation_name == "insert": + self.client.upload_points(mt_collection, points) + return None + else: # upsert + return self.client.upsert(mt_collection, points) + except (UnexpectedResponse, grpc.RpcError) as e: + # Handle collection not found + if self._is_collection_not_found_error(e): + log.info( + f"Collection {mt_collection} doesn't exist. Creating it with dimension {dimension}." + ) + # Create collection with correct dimensions from our vectors + self._create_multi_tenant_collection_if_not_exists( + mt_collection_name=mt_collection, dimension=dimension + ) + # Try operation again - no need for dimension adjustment since we just created with correct dimensions + if operation_name == "insert": + self.client.upload_points(mt_collection, points) + return None + else: # upsert + return self.client.upsert(mt_collection, points) + + # Handle dimension mismatch + elif self._is_dimension_mismatch_error(e): + # For dimension errors, the collection must exist, so get its configuration + mt_collection_info = self.client.get_collection(mt_collection) + existing_size = mt_collection_info.config.params.vectors.size + + log.info( + f"Dimension mismatch: Collection {mt_collection} expects {existing_size}, got {dimension}" + ) + + if existing_size < dimension: + # Truncate vectors to fit + log.info( + f"Truncating vectors from {dimension} to {existing_size} dimensions" + ) + points = [ + PointStruct( + id=point.id, + vector=point.vector[:existing_size], + payload=point.payload, + ) + for point in points + ] + elif existing_size > dimension: + # Pad vectors with zeros + log.info( + f"Padding vectors from {dimension} to {existing_size} dimensions with zeros" + ) + points = [ + PointStruct( + id=point.id, + vector=point.vector + + [0] * (existing_size - len(point.vector)), + payload=point.payload, + ) + for point in points + ] + # Try operation again with adjusted dimensions + if operation_name == "insert": + self.client.upload_points(mt_collection, points) + return None + else: # upsert + return self.client.upsert(mt_collection, points) + else: + # Not a known error we can handle, log and re-raise + _, error_msg = self._extract_error_message(e) + log.warning(f"Unhandled Qdrant error: {error_msg}") + raise + except Exception as e: + # For non-Qdrant exceptions, re-raise + raise + + def insert(self, collection_name: str, items: list[VectorItem]): + """ + Insert items with tenant ID. + """ + if not self.client or not items: + return None + + # Map to multi-tenant collection and tenant ID + mt_collection, tenant_id = self._get_collection_and_tenant_id(collection_name) + + # Get dimensions from the actual vectors + dimension = len(items[0]["vector"]) if items else None + + # Create points with tenant ID + points = self._create_points(items, tenant_id) + + # Handle the operation with error retry + return self._handle_operation_with_error_retry( + "insert", mt_collection, points, dimension + ) + + def upsert(self, collection_name: str, items: list[VectorItem]): + """ + Upsert items with tenant ID. + """ + if not self.client or not items: + return None + + # Map to multi-tenant collection and tenant ID + mt_collection, tenant_id = self._get_collection_and_tenant_id(collection_name) + + # Get dimensions from the actual vectors + dimension = len(items[0]["vector"]) if items else None + + # Create points with tenant ID + points = self._create_points(items, tenant_id) + + # Handle the operation with error retry + return self._handle_operation_with_error_retry( + "upsert", mt_collection, points, dimension + ) + + def reset(self): + """ + Reset the database by deleting all collections. + """ + if not self.client: + return None + + collection_names = self.client.get_collections().collections + for collection_name in collection_names: + if collection_name.name.startswith(self.collection_prefix): + self.client.delete_collection(collection_name=collection_name.name) + + def delete_collection(self, collection_name: str): + """ + Delete a collection. + """ + if not self.client: + return None + + # Map to multi-tenant collection and tenant ID + mt_collection, tenant_id = self._get_collection_and_tenant_id(collection_name) + + tenant_filter = models.FieldCondition( + key="tenant_id", match=models.MatchValue(value=tenant_id) + ) + + field_conditions = [tenant_filter] + + update_result = self.client.delete( + collection_name=mt_collection, + points_selector=models.FilterSelector( + filter=models.Filter(must=field_conditions) + ), + ) + + if self.client.get_collection(mt_collection).points_count == 0: + self.client.delete_collection(mt_collection) + + return update_result diff --git a/backend/open_webui/retrieval/vector/factory.py b/backend/open_webui/retrieval/vector/factory.py new file mode 100644 index 000000000..84126724a --- /dev/null +++ b/backend/open_webui/retrieval/vector/factory.py @@ -0,0 +1,53 @@ +from open_webui.retrieval.vector.main import VectorDBBase +from open_webui.retrieval.vector.type import VectorType +from open_webui.config import VECTOR_DB, ENABLE_QDRANT_MULTITENANCY_MODE + + +class Vector: + + @staticmethod + def get_vector(vector_type: str) -> VectorDBBase: + """ + get vector db instance by vector type + """ + match vector_type: + case VectorType.MILVUS: + from open_webui.retrieval.vector.dbs.milvus import MilvusClient + + return MilvusClient() + case VectorType.QDRANT: + if ENABLE_QDRANT_MULTITENANCY_MODE: + from open_webui.retrieval.vector.dbs.qdrant_multitenancy import QdrantClient + + return QdrantClient() + else: + from open_webui.retrieval.vector.dbs.qdrant import QdrantClient + + return QdrantClient() + case VectorType.PINECONE: + from open_webui.retrieval.vector.dbs.pinecone import PineconeClient + + return PineconeClient() + case VectorType.OPENSEARCH: + from open_webui.retrieval.vector.dbs.opensearch import OpenSearchClient + + return OpenSearchClient() + case VectorType.PGVECTOR: + from open_webui.retrieval.vector.dbs.pgvector import PgvectorClient + + return PgvectorClient() + case VectorType.ELASTICSEARCH: + from open_webui.retrieval.vector.dbs.elasticsearch import ( + ElasticsearchClient, + ) + + return ElasticsearchClient() + case VectorType.CHROMA: + from open_webui.retrieval.vector.dbs.chroma import ChromaClient + + return ChromaClient() + case _: + raise ValueError(f"Unsupported vector type: {vector_type}") + + +VECTOR_DB_CLIENT = Vector.get_vector(VECTOR_DB) diff --git a/backend/open_webui/retrieval/vector/type.py b/backend/open_webui/retrieval/vector/type.py new file mode 100644 index 000000000..b03bcb482 --- /dev/null +++ b/backend/open_webui/retrieval/vector/type.py @@ -0,0 +1,11 @@ +from enum import StrEnum + + +class VectorType(StrEnum): + MILVUS = "milvus" + QDRANT = "qdrant" + CHROMA = "chroma" + PINECONE = "pinecone" + ELASTICSEARCH = "elasticsearch" + OPENSEARCH = "opensearch" + PGVECTOR = "pgvector" diff --git a/backend/open_webui/retrieval/web/utils.py b/backend/open_webui/retrieval/web/utils.py index 78c962f15..b8ec538d3 100644 --- a/backend/open_webui/retrieval/web/utils.py +++ b/backend/open_webui/retrieval/web/utils.py @@ -25,7 +25,7 @@ from langchain_community.document_loaders.firecrawl import FireCrawlLoader from langchain_community.document_loaders.base import BaseLoader from langchain_core.documents import Document from open_webui.retrieval.loaders.tavily import TavilyLoader -from open_webui.retrieval.loaders.external import ExternalLoader +from open_webui.retrieval.loaders.external_web import ExternalWebLoader from open_webui.constants import ERROR_MESSAGES from open_webui.config import ( ENABLE_RAG_LOCAL_WEB_FETCH, @@ -39,7 +39,7 @@ from open_webui.config import ( EXTERNAL_WEB_LOADER_URL, EXTERNAL_WEB_LOADER_API_KEY, ) -from open_webui.env import SRC_LOG_LEVELS +from open_webui.env import SRC_LOG_LEVELS, AIOHTTP_CLIENT_SESSION_SSL log = logging.getLogger(__name__) log.setLevel(SRC_LOG_LEVELS["RAG"]) @@ -515,7 +515,9 @@ class SafeWebBaseLoader(WebBaseLoader): kwargs["ssl"] = False async with session.get( - url, **(self.requests_kwargs | kwargs) + url, + **(self.requests_kwargs | kwargs), + ssl=AIOHTTP_CLIENT_SESSION_SSL, ) as response: if self.raise_for_status: response.raise_for_status() @@ -628,7 +630,7 @@ def get_web_loader( web_loader_args["extract_depth"] = TAVILY_EXTRACT_DEPTH.value if WEB_LOADER_ENGINE.value == "external": - WebLoaderClass = ExternalLoader + WebLoaderClass = ExternalWebLoader web_loader_args["external_url"] = EXTERNAL_WEB_LOADER_URL.value web_loader_args["external_api_key"] = EXTERNAL_WEB_LOADER_API_KEY.value diff --git a/backend/open_webui/routers/audio.py b/backend/open_webui/routers/audio.py index 445857c88..7f67c65bd 100644 --- a/backend/open_webui/routers/audio.py +++ b/backend/open_webui/routers/audio.py @@ -38,6 +38,7 @@ from open_webui.config import ( from open_webui.constants import ERROR_MESSAGES from open_webui.env import ( + AIOHTTP_CLIENT_SESSION_SSL, AIOHTTP_CLIENT_TIMEOUT, ENV, SRC_LOG_LEVELS, @@ -326,6 +327,7 @@ async def speech(request: Request, user=Depends(get_verified_user)): else {} ), }, + ssl=AIOHTTP_CLIENT_SESSION_SSL, ) as r: r.raise_for_status() @@ -381,6 +383,7 @@ async def speech(request: Request, user=Depends(get_verified_user)): "Content-Type": "application/json", "xi-api-key": request.app.state.config.TTS_API_KEY, }, + ssl=AIOHTTP_CLIENT_SESSION_SSL, ) as r: r.raise_for_status() @@ -439,6 +442,7 @@ async def speech(request: Request, user=Depends(get_verified_user)): "X-Microsoft-OutputFormat": output_format, }, data=data, + ssl=AIOHTTP_CLIENT_SESSION_SSL, ) as r: r.raise_for_status() diff --git a/backend/open_webui/routers/auths.py b/backend/open_webui/routers/auths.py index 309862ed5..390c7d428 100644 --- a/backend/open_webui/routers/auths.py +++ b/backend/open_webui/routers/auths.py @@ -51,7 +51,7 @@ from open_webui.utils.access_control import get_permissions from typing import Optional, List -from ssl import CERT_REQUIRED, PROTOCOL_TLS +from ssl import CERT_NONE, CERT_REQUIRED, PROTOCOL_TLS if ENABLE_LDAP.value: from ldap3 import Server, Connection, NONE, Tls @@ -186,6 +186,9 @@ async def ldap_auth(request: Request, response: Response, form_data: LdapForm): LDAP_APP_PASSWORD = request.app.state.config.LDAP_APP_PASSWORD LDAP_USE_TLS = request.app.state.config.LDAP_USE_TLS LDAP_CA_CERT_FILE = request.app.state.config.LDAP_CA_CERT_FILE + LDAP_VALIDATE_CERT = ( + CERT_REQUIRED if request.app.state.config.LDAP_VALIDATE_CERT else CERT_NONE + ) LDAP_CIPHERS = ( request.app.state.config.LDAP_CIPHERS if request.app.state.config.LDAP_CIPHERS @@ -197,7 +200,7 @@ async def ldap_auth(request: Request, response: Response, form_data: LdapForm): try: tls = Tls( - validate=CERT_REQUIRED, + validate=LDAP_VALIDATE_CERT, version=PROTOCOL_TLS, ca_certs_file=LDAP_CA_CERT_FILE, ciphers=LDAP_CIPHERS, @@ -478,10 +481,6 @@ async def signup(request: Request, response: Response, form_data: SignupForm): "admin" if user_count == 0 else request.app.state.config.DEFAULT_USER_ROLE ) - if user_count == 0: - # Disable signup after the first user is created - request.app.state.config.ENABLE_SIGNUP = False - # The password passed to bcrypt must be 72 bytes or fewer. If it is longer, it will be truncated before hashing. if len(form_data.password.encode("utf-8")) > 72: raise HTTPException( @@ -541,6 +540,10 @@ async def signup(request: Request, response: Response, form_data: SignupForm): user.id, request.app.state.config.USER_PERMISSIONS ) + if user_count == 0: + # Disable signup after the first user is created + request.app.state.config.ENABLE_SIGNUP = False + return { "token": token, "token_type": "Bearer", @@ -696,6 +699,9 @@ async def get_admin_config(request: Request, user=Depends(get_admin_user)): "ENABLE_CHANNELS": request.app.state.config.ENABLE_CHANNELS, "ENABLE_NOTES": request.app.state.config.ENABLE_NOTES, "ENABLE_USER_WEBHOOKS": request.app.state.config.ENABLE_USER_WEBHOOKS, + "PENDING_USER_OVERLAY_TITLE": request.app.state.config.PENDING_USER_OVERLAY_TITLE, + "PENDING_USER_OVERLAY_CONTENT": request.app.state.config.PENDING_USER_OVERLAY_CONTENT, + "RESPONSE_WATERMARK": request.app.state.config.RESPONSE_WATERMARK, } @@ -713,6 +719,9 @@ class AdminConfig(BaseModel): ENABLE_CHANNELS: bool ENABLE_NOTES: bool ENABLE_USER_WEBHOOKS: bool + PENDING_USER_OVERLAY_TITLE: Optional[str] = None + PENDING_USER_OVERLAY_CONTENT: Optional[str] = None + RESPONSE_WATERMARK: Optional[str] = None @router.post("/admin/config") @@ -750,6 +759,15 @@ async def update_admin_config( request.app.state.config.ENABLE_USER_WEBHOOKS = form_data.ENABLE_USER_WEBHOOKS + request.app.state.config.PENDING_USER_OVERLAY_TITLE = ( + form_data.PENDING_USER_OVERLAY_TITLE + ) + request.app.state.config.PENDING_USER_OVERLAY_CONTENT = ( + form_data.PENDING_USER_OVERLAY_CONTENT + ) + + request.app.state.config.RESPONSE_WATERMARK = form_data.RESPONSE_WATERMARK + return { "SHOW_ADMIN_DETAILS": request.app.state.config.SHOW_ADMIN_DETAILS, "WEBUI_URL": request.app.state.config.WEBUI_URL, @@ -764,6 +782,9 @@ async def update_admin_config( "ENABLE_CHANNELS": request.app.state.config.ENABLE_CHANNELS, "ENABLE_NOTES": request.app.state.config.ENABLE_NOTES, "ENABLE_USER_WEBHOOKS": request.app.state.config.ENABLE_USER_WEBHOOKS, + "PENDING_USER_OVERLAY_TITLE": request.app.state.config.PENDING_USER_OVERLAY_TITLE, + "PENDING_USER_OVERLAY_CONTENT": request.app.state.config.PENDING_USER_OVERLAY_CONTENT, + "RESPONSE_WATERMARK": request.app.state.config.RESPONSE_WATERMARK, } @@ -779,6 +800,7 @@ class LdapServerConfig(BaseModel): search_filters: str = "" use_tls: bool = True certificate_path: Optional[str] = None + validate_cert: bool = True ciphers: Optional[str] = "ALL" @@ -796,6 +818,7 @@ async def get_ldap_server(request: Request, user=Depends(get_admin_user)): "search_filters": request.app.state.config.LDAP_SEARCH_FILTERS, "use_tls": request.app.state.config.LDAP_USE_TLS, "certificate_path": request.app.state.config.LDAP_CA_CERT_FILE, + "validate_cert": request.app.state.config.LDAP_VALIDATE_CERT, "ciphers": request.app.state.config.LDAP_CIPHERS, } @@ -831,6 +854,7 @@ async def update_ldap_server( request.app.state.config.LDAP_SEARCH_FILTERS = form_data.search_filters request.app.state.config.LDAP_USE_TLS = form_data.use_tls request.app.state.config.LDAP_CA_CERT_FILE = form_data.certificate_path + request.app.state.config.LDAP_VALIDATE_CERT = form_data.validate_cert request.app.state.config.LDAP_CIPHERS = form_data.ciphers return { @@ -845,6 +869,7 @@ async def update_ldap_server( "search_filters": request.app.state.config.LDAP_SEARCH_FILTERS, "use_tls": request.app.state.config.LDAP_USE_TLS, "certificate_path": request.app.state.config.LDAP_CA_CERT_FILE, + "validate_cert": request.app.state.config.LDAP_VALIDATE_CERT, "ciphers": request.app.state.config.LDAP_CIPHERS, } diff --git a/backend/open_webui/routers/evaluations.py b/backend/open_webui/routers/evaluations.py index 36320b6fc..164f3c40b 100644 --- a/backend/open_webui/routers/evaluations.py +++ b/backend/open_webui/routers/evaluations.py @@ -74,13 +74,17 @@ class FeedbackUserResponse(FeedbackResponse): @router.get("/feedbacks/all", response_model=list[FeedbackUserResponse]) async def get_all_feedbacks(user=Depends(get_admin_user)): feedbacks = Feedbacks.get_all_feedbacks() - return [ - FeedbackUserResponse( - **feedback.model_dump(), - user=UserResponse(**Users.get_user_by_id(feedback.user_id).model_dump()), + + feedback_list = [] + for feedback in feedbacks: + user = Users.get_user_by_id(feedback.user_id) + feedback_list.append( + FeedbackUserResponse( + **feedback.model_dump(), + user=UserResponse(**user.model_dump()) if user else None, + ) ) - for feedback in feedbacks - ] + return feedback_list @router.delete("/feedbacks/all") @@ -92,12 +96,7 @@ async def delete_all_feedbacks(user=Depends(get_admin_user)): @router.get("/feedbacks/all/export", response_model=list[FeedbackModel]) async def get_all_feedbacks(user=Depends(get_admin_user)): feedbacks = Feedbacks.get_all_feedbacks() - return [ - FeedbackModel( - **feedback.model_dump(), user=Users.get_user_by_id(feedback.user_id) - ) - for feedback in feedbacks - ] + return feedbacks @router.get("/feedbacks/user", response_model=list[FeedbackUserResponse]) diff --git a/backend/open_webui/routers/files.py b/backend/open_webui/routers/files.py index 475905da1..ec76d7e63 100644 --- a/backend/open_webui/routers/files.py +++ b/backend/open_webui/routers/files.py @@ -95,6 +95,16 @@ def upload_file( unsanitized_filename = file.filename filename = os.path.basename(unsanitized_filename) + file_extension = os.path.splitext(filename)[1] + if request.app.state.config.ALLOWED_FILE_EXTENSIONS: + if file_extension not in request.app.state.config.ALLOWED_FILE_EXTENSIONS: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.DEFAULT( + f"File type {file_extension} is not allowed" + ), + ) + # replace filename with uuid id = str(uuid.uuid4()) name = filename @@ -125,33 +135,38 @@ def upload_file( ) if process: try: + if file.content_type: + if file.content_type.startswith( + ( + "audio/mpeg", + "audio/wav", + "audio/ogg", + "audio/x-m4a", + "audio/webm", + "video/webm", + ) + ): + file_path = Storage.get_file(file_path) + result = transcribe(request, file_path) - if file.content_type.startswith( - ( - "audio/mpeg", - "audio/wav", - "audio/ogg", - "audio/x-m4a", - "audio/webm", - "video/webm", + process_file( + request, + ProcessFileForm(file_id=id, content=result.get("text", "")), + user=user, + ) + elif file.content_type not in [ + "image/png", + "image/jpeg", + "image/gif", + "video/mp4", + "video/ogg", + "video/quicktime", + ]: + process_file(request, ProcessFileForm(file_id=id), user=user) + else: + log.info( + f"File type {file.content_type} is not provided, but trying to process anyway" ) - ): - file_path = Storage.get_file(file_path) - result = transcribe(request, file_path) - - process_file( - request, - ProcessFileForm(file_id=id, content=result.get("text", "")), - user=user, - ) - elif file.content_type not in [ - "image/png", - "image/jpeg", - "image/gif", - "video/mp4", - "video/ogg", - "video/quicktime", - ]: process_file(request, ProcessFileForm(file_id=id), user=user) file_item = Files.get_file_by_id(id=id) diff --git a/backend/open_webui/routers/knowledge.py b/backend/open_webui/routers/knowledge.py index 920130858..e6e55f4d3 100644 --- a/backend/open_webui/routers/knowledge.py +++ b/backend/open_webui/routers/knowledge.py @@ -10,7 +10,7 @@ from open_webui.models.knowledge import ( KnowledgeUserResponse, ) from open_webui.models.files import Files, FileModel, FileMetadataResponse -from open_webui.retrieval.vector.connector import VECTOR_DB_CLIENT +from open_webui.retrieval.vector.factory import VECTOR_DB_CLIENT from open_webui.routers.retrieval import ( process_file, ProcessFileForm, diff --git a/backend/open_webui/routers/memories.py b/backend/open_webui/routers/memories.py index 6d54c9c17..333e9ecc6 100644 --- a/backend/open_webui/routers/memories.py +++ b/backend/open_webui/routers/memories.py @@ -4,7 +4,7 @@ import logging from typing import Optional from open_webui.models.memories import Memories, MemoryModel -from open_webui.retrieval.vector.connector import VECTOR_DB_CLIENT +from open_webui.retrieval.vector.factory import VECTOR_DB_CLIENT from open_webui.utils.auth import get_verified_user from open_webui.env import SRC_LOG_LEVELS diff --git a/backend/open_webui/routers/ollama.py b/backend/open_webui/routers/ollama.py index 790f7dece..85349339f 100644 --- a/backend/open_webui/routers/ollama.py +++ b/backend/open_webui/routers/ollama.py @@ -1585,7 +1585,9 @@ async def upload_model( if url_idx is None: url_idx = 0 ollama_url = request.app.state.config.OLLAMA_BASE_URLS[url_idx] - file_path = os.path.join(UPLOAD_DIR, file.filename) + + filename = os.path.basename(file.filename) + file_path = os.path.join(UPLOAD_DIR, filename) os.makedirs(UPLOAD_DIR, exist_ok=True) # --- P1: save file locally --- @@ -1630,13 +1632,13 @@ async def upload_model( os.remove(file_path) # Create model in ollama - model_name, ext = os.path.splitext(file.filename) + model_name, ext = os.path.splitext(filename) log.info(f"Created Model: {model_name}") # DEBUG create_payload = { "model": model_name, # Reference the file by its original name => the uploaded blob's digest - "files": {file.filename: f"sha256:{file_hash}"}, + "files": {filename: f"sha256:{file_hash}"}, } log.info(f"Model Payload: {create_payload}") # DEBUG @@ -1653,7 +1655,7 @@ async def upload_model( done_msg = { "done": True, "blob": f"sha256:{file_hash}", - "name": file.filename, + "name": filename, "model_created": model_name, } yield f"data: {json.dumps(done_msg)}\n\n" diff --git a/backend/open_webui/routers/pipelines.py b/backend/open_webui/routers/pipelines.py index f14002502..f80ea91f8 100644 --- a/backend/open_webui/routers/pipelines.py +++ b/backend/open_webui/routers/pipelines.py @@ -18,7 +18,7 @@ from pydantic import BaseModel from starlette.responses import FileResponse from typing import Optional -from open_webui.env import SRC_LOG_LEVELS +from open_webui.env import SRC_LOG_LEVELS, AIOHTTP_CLIENT_SESSION_SSL from open_webui.config import CACHE_DIR from open_webui.constants import ERROR_MESSAGES @@ -69,7 +69,10 @@ async def process_pipeline_inlet_filter(request, payload, user, models): async with aiohttp.ClientSession(trust_env=True) as session: for filter in sorted_filters: urlIdx = filter.get("urlIdx") - if urlIdx is None: + + try: + urlIdx = int(urlIdx) + except: continue url = request.app.state.config.OPENAI_API_BASE_URLS[urlIdx] @@ -89,6 +92,7 @@ async def process_pipeline_inlet_filter(request, payload, user, models): f"{url}/{filter['id']}/filter/inlet", headers=headers, json=request_data, + ssl=AIOHTTP_CLIENT_SESSION_SSL, ) as response: payload = await response.json() response.raise_for_status() @@ -118,7 +122,10 @@ async def process_pipeline_outlet_filter(request, payload, user, models): async with aiohttp.ClientSession(trust_env=True) as session: for filter in sorted_filters: urlIdx = filter.get("urlIdx") - if urlIdx is None: + + try: + urlIdx = int(urlIdx) + except: continue url = request.app.state.config.OPENAI_API_BASE_URLS[urlIdx] @@ -138,6 +145,7 @@ async def process_pipeline_outlet_filter(request, payload, user, models): f"{url}/{filter['id']}/filter/outlet", headers=headers, json=request_data, + ssl=AIOHTTP_CLIENT_SESSION_SSL, ) as response: payload = await response.json() response.raise_for_status() @@ -197,8 +205,10 @@ async def upload_pipeline( user=Depends(get_admin_user), ): log.info(f"upload_pipeline: urlIdx={urlIdx}, filename={file.filename}") + filename = os.path.basename(file.filename) + # Check if the uploaded file is a python file - if not (file.filename and file.filename.endswith(".py")): + if not (filename and filename.endswith(".py")): raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail="Only Python (.py) files are allowed.", @@ -206,7 +216,7 @@ async def upload_pipeline( upload_folder = f"{CACHE_DIR}/pipelines" os.makedirs(upload_folder, exist_ok=True) - file_path = os.path.join(upload_folder, file.filename) + file_path = os.path.join(upload_folder, filename) r = None try: diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index efefa12fc..1d0af3029 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -36,7 +36,7 @@ from open_webui.models.knowledge import Knowledges from open_webui.storage.provider import Storage -from open_webui.retrieval.vector.connector import VECTOR_DB_CLIENT +from open_webui.retrieval.vector.factory import VECTOR_DB_CLIENT # Document loaders from open_webui.retrieval.loaders.main import Loader @@ -352,10 +352,13 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)): # Content extraction settings "CONTENT_EXTRACTION_ENGINE": request.app.state.config.CONTENT_EXTRACTION_ENGINE, "PDF_EXTRACT_IMAGES": request.app.state.config.PDF_EXTRACT_IMAGES, + "EXTERNAL_DOCUMENT_LOADER_URL": request.app.state.config.EXTERNAL_DOCUMENT_LOADER_URL, + "EXTERNAL_DOCUMENT_LOADER_API_KEY": request.app.state.config.EXTERNAL_DOCUMENT_LOADER_API_KEY, "TIKA_SERVER_URL": request.app.state.config.TIKA_SERVER_URL, "DOCLING_SERVER_URL": request.app.state.config.DOCLING_SERVER_URL, "DOCLING_OCR_ENGINE": request.app.state.config.DOCLING_OCR_ENGINE, "DOCLING_OCR_LANG": request.app.state.config.DOCLING_OCR_LANG, + "DOCLING_DO_PICTURE_DESCRIPTION": request.app.state.config.DOCLING_DO_PICTURE_DESCRIPTION, "DOCUMENT_INTELLIGENCE_ENDPOINT": request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT, "DOCUMENT_INTELLIGENCE_KEY": request.app.state.config.DOCUMENT_INTELLIGENCE_KEY, "MISTRAL_OCR_API_KEY": request.app.state.config.MISTRAL_OCR_API_KEY, @@ -492,10 +495,14 @@ class ConfigForm(BaseModel): # Content extraction settings CONTENT_EXTRACTION_ENGINE: Optional[str] = None PDF_EXTRACT_IMAGES: Optional[bool] = None + EXTERNAL_DOCUMENT_LOADER_URL: Optional[str] = None + EXTERNAL_DOCUMENT_LOADER_API_KEY: Optional[str] = None + TIKA_SERVER_URL: Optional[str] = None DOCLING_SERVER_URL: Optional[str] = None DOCLING_OCR_ENGINE: Optional[str] = None DOCLING_OCR_LANG: Optional[str] = None + DOCLING_DO_PICTURE_DESCRIPTION: Optional[bool] = None DOCUMENT_INTELLIGENCE_ENDPOINT: Optional[str] = None DOCUMENT_INTELLIGENCE_KEY: Optional[str] = None MISTRAL_OCR_API_KEY: Optional[str] = None @@ -581,6 +588,16 @@ async def update_rag_config( if form_data.PDF_EXTRACT_IMAGES is not None else request.app.state.config.PDF_EXTRACT_IMAGES ) + request.app.state.config.EXTERNAL_DOCUMENT_LOADER_URL = ( + form_data.EXTERNAL_DOCUMENT_LOADER_URL + if form_data.EXTERNAL_DOCUMENT_LOADER_URL is not None + else request.app.state.config.EXTERNAL_DOCUMENT_LOADER_URL + ) + request.app.state.config.EXTERNAL_DOCUMENT_LOADER_API_KEY = ( + form_data.EXTERNAL_DOCUMENT_LOADER_API_KEY + if form_data.EXTERNAL_DOCUMENT_LOADER_API_KEY is not None + else request.app.state.config.EXTERNAL_DOCUMENT_LOADER_API_KEY + ) request.app.state.config.TIKA_SERVER_URL = ( form_data.TIKA_SERVER_URL if form_data.TIKA_SERVER_URL is not None @@ -601,6 +618,13 @@ async def update_rag_config( if form_data.DOCLING_OCR_LANG is not None else request.app.state.config.DOCLING_OCR_LANG ) + + request.app.state.config.DOCLING_DO_PICTURE_DESCRIPTION = ( + form_data.DOCLING_DO_PICTURE_DESCRIPTION + if form_data.DOCLING_DO_PICTURE_DESCRIPTION is not None + else request.app.state.config.DOCLING_DO_PICTURE_DESCRIPTION + ) + request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT = ( form_data.DOCUMENT_INTELLIGENCE_ENDPOINT if form_data.DOCUMENT_INTELLIGENCE_ENDPOINT is not None @@ -809,10 +833,13 @@ async def update_rag_config( # Content extraction settings "CONTENT_EXTRACTION_ENGINE": request.app.state.config.CONTENT_EXTRACTION_ENGINE, "PDF_EXTRACT_IMAGES": request.app.state.config.PDF_EXTRACT_IMAGES, + "EXTERNAL_DOCUMENT_LOADER_URL": request.app.state.config.EXTERNAL_DOCUMENT_LOADER_URL, + "EXTERNAL_DOCUMENT_LOADER_API_KEY": request.app.state.config.EXTERNAL_DOCUMENT_LOADER_API_KEY, "TIKA_SERVER_URL": request.app.state.config.TIKA_SERVER_URL, "DOCLING_SERVER_URL": request.app.state.config.DOCLING_SERVER_URL, "DOCLING_OCR_ENGINE": request.app.state.config.DOCLING_OCR_ENGINE, "DOCLING_OCR_LANG": request.app.state.config.DOCLING_OCR_LANG, + "DOCLING_DO_PICTURE_DESCRIPTION": request.app.state.config.DOCLING_DO_PICTURE_DESCRIPTION, "DOCUMENT_INTELLIGENCE_ENDPOINT": request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT, "DOCUMENT_INTELLIGENCE_KEY": request.app.state.config.DOCUMENT_INTELLIGENCE_KEY, "MISTRAL_OCR_API_KEY": request.app.state.config.MISTRAL_OCR_API_KEY, @@ -1129,10 +1156,13 @@ def process_file( file_path = Storage.get_file(file_path) loader = Loader( engine=request.app.state.config.CONTENT_EXTRACTION_ENGINE, + EXTERNAL_DOCUMENT_LOADER_URL=request.app.state.config.EXTERNAL_DOCUMENT_LOADER_URL, + EXTERNAL_DOCUMENT_LOADER_API_KEY=request.app.state.config.EXTERNAL_DOCUMENT_LOADER_API_KEY, TIKA_SERVER_URL=request.app.state.config.TIKA_SERVER_URL, DOCLING_SERVER_URL=request.app.state.config.DOCLING_SERVER_URL, DOCLING_OCR_ENGINE=request.app.state.config.DOCLING_OCR_ENGINE, DOCLING_OCR_LANG=request.app.state.config.DOCLING_OCR_LANG, + DOCLING_DO_PICTURE_DESCRIPTION=request.app.state.config.DOCLING_DO_PICTURE_DESCRIPTION, PDF_EXTRACT_IMAGES=request.app.state.config.PDF_EXTRACT_IMAGES, DOCUMENT_INTELLIGENCE_ENDPOINT=request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT, DOCUMENT_INTELLIGENCE_KEY=request.app.state.config.DOCUMENT_INTELLIGENCE_KEY, diff --git a/backend/open_webui/utils/middleware.py b/backend/open_webui/utils/middleware.py index 442dfba76..03ff781e4 100644 --- a/backend/open_webui/utils/middleware.py +++ b/backend/open_webui/utils/middleware.py @@ -651,7 +651,7 @@ def apply_params_to_form_data(form_data, model): convert_logit_bias_input_to_json(params["logit_bias"]) ) except Exception as e: - print(f"Error parsing logit_bias: {e}") + log.exception(f"Error parsing logit_bias: {e}") return form_data diff --git a/backend/open_webui/utils/oauth.py b/backend/open_webui/utils/oauth.py index 0bd82b577..f6004515f 100644 --- a/backend/open_webui/utils/oauth.py +++ b/backend/open_webui/utils/oauth.py @@ -41,6 +41,7 @@ from open_webui.config import ( ) from open_webui.constants import ERROR_MESSAGES, WEBHOOK_MESSAGES from open_webui.env import ( + AIOHTTP_CLIENT_SESSION_SSL, WEBUI_NAME, WEBUI_AUTH_COOKIE_SAME_SITE, WEBUI_AUTH_COOKIE_SECURE, @@ -305,8 +306,10 @@ class OAuthManager: get_kwargs["headers"] = { "Authorization": f"Bearer {access_token}", } - async with aiohttp.ClientSession() as session: - async with session.get(picture_url, **get_kwargs) as resp: + async with aiohttp.ClientSession(trust_env=True) as session: + async with session.get( + picture_url, **get_kwargs, ssl=AIOHTTP_CLIENT_SESSION_SSL + ) as resp: if resp.ok: picture = await resp.read() base64_encoded_picture = base64.b64encode(picture).decode( @@ -371,7 +374,9 @@ class OAuthManager: headers = {"Authorization": f"Bearer {access_token}"} async with aiohttp.ClientSession(trust_env=True) as session: async with session.get( - "https://api.github.com/user/emails", headers=headers + "https://api.github.com/user/emails", + headers=headers, + ssl=AIOHTTP_CLIENT_SESSION_SSL, ) as resp: if resp.ok: emails = await resp.json() diff --git a/backend/open_webui/utils/tools.py b/backend/open_webui/utils/tools.py index 123ec5fb9..f0b37b605 100644 --- a/backend/open_webui/utils/tools.py +++ b/backend/open_webui/utils/tools.py @@ -37,6 +37,7 @@ from open_webui.models.tools import Tools from open_webui.models.users import UserModel from open_webui.utils.plugin import load_tool_module_by_id from open_webui.env import ( + SRC_LOG_LEVELS, AIOHTTP_CLIENT_TIMEOUT_TOOL_SERVER_DATA, AIOHTTP_CLIENT_SESSION_TOOL_SERVER_SSL, ) @@ -44,6 +45,7 @@ from open_webui.env import ( import copy log = logging.getLogger(__name__) +log.setLevel(SRC_LOG_LEVELS["MODELS"]) def get_async_tool_function_and_apply_extra_params( @@ -477,7 +479,7 @@ async def get_tool_server_data(token: str, url: str) -> Dict[str, Any]: "specs": convert_openapi_to_tool_payload(res), } - print("Fetched data:", data) + log.info("Fetched data:", data) return data @@ -510,7 +512,7 @@ async def get_tool_servers_data( results = [] for (idx, server, url, _), response in zip(server_entries, responses): if isinstance(response, Exception): - print(f"Failed to connect to {url} OpenAPI tool server") + log.error(f"Failed to connect to {url} OpenAPI tool server") continue results.append( @@ -620,5 +622,5 @@ async def execute_tool_server( except Exception as err: error = str(err) - print("API Request Error:", error) + log.exception("API Request Error:", error) return {"error": error} diff --git a/package.json b/package.json index e7229fb5c..9ffa8298e 100644 --- a/package.json +++ b/package.json @@ -6,6 +6,7 @@ "dev": "npm run pyodide:fetch && vite dev --host", "dev:5050": "npm run pyodide:fetch && vite dev --port 5050", "build": "npm run pyodide:fetch && vite build", + "build:watch": "npm run pyodide:fetch && vite build --watch", "preview": "vite preview", "check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json", "check:watch": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json --watch", diff --git a/src/app.css b/src/app.css index 5cfdd8df0..925b9c52d 100644 --- a/src/app.css +++ b/src/app.css @@ -314,12 +314,20 @@ input[type='number'] { .ProseMirror p.is-editor-empty:first-child::before { content: attr(data-placeholder); float: left; - color: #adb5bd; + /* Below color is from tailwind, and has the proper contrast + text-gray-600 from: https://tailwindcss.com/docs/color */ + color: #676767; pointer-events: none; @apply line-clamp-1 absolute; } +@media (prefers-color-scheme: dark) { + .ProseMirror p.is-editor-empty:first-child::before { + color: #757575; + } +} + .ai-autocompletion::after { color: #a0a0a0; diff --git a/src/lib/components/admin/Settings/Documents.svelte b/src/lib/components/admin/Settings/Documents.svelte index cc56356fa..498ff2c1e 100644 --- a/src/lib/components/admin/Settings/Documents.svelte +++ b/src/lib/components/admin/Settings/Documents.svelte @@ -124,6 +124,13 @@ }; const submitHandler = async () => { + if ( + RAGConfig.CONTENT_EXTRACTION_ENGINE === 'external' && + RAGConfig.EXTERNAL_DOCUMENT_LOADER_URL === '' + ) { + toast.error($i18n.t('External Document Loader URL required.')); + return; + } if (RAGConfig.CONTENT_EXTRACTION_ENGINE === 'tika' && RAGConfig.TIKA_SERVER_URL === '') { toast.error($i18n.t('Tika Server URL required.')); return; @@ -246,7 +253,7 @@
-
+
{$i18n.t('Content Extraction Engine')}
@@ -256,6 +263,7 @@ bind:value={RAGConfig.CONTENT_EXTRACTION_ENGINE} > + @@ -275,11 +283,24 @@
+ {:else if RAGConfig.CONTENT_EXTRACTION_ENGINE === 'external'} +
+ + +
{:else if RAGConfig.CONTENT_EXTRACTION_ENGINE === 'tika'}
@@ -288,27 +309,38 @@ {:else if RAGConfig.CONTENT_EXTRACTION_ENGINE === 'docling'}
+ +
+
+
+ {$i18n.t('Describe Pictures in Documents')} +
+
+ +
+
+
{:else if RAGConfig.CONTENT_EXTRACTION_ENGINE === 'document_intelligence'}
@@ -437,7 +469,7 @@ {#if embeddingEngine === 'openai'}
{$i18n.t('Top K')}
{$i18n.t('Top K Reranker')}
+
+
+ {$i18n.t('Pending User Overlay Title')} +
+