From 4e8b3906821a9a10f4fd0038373291dff41b65cf Mon Sep 17 00:00:00 2001 From: Rory <16675082+roryeckel@users.noreply.github.com> Date: Tue, 28 Jan 2025 23:03:15 -0600 Subject: [PATCH 01/96] Add RAG_WEB_LOADER + Playwright mode + improve stability of search --- backend/open_webui/config.py | 5 + backend/open_webui/main.py | 2 + backend/open_webui/retrieval/web/main.py | 4 + backend/open_webui/retrieval/web/utils.py | 179 +++++++++++++++++++--- backend/open_webui/routers/retrieval.py | 21 ++- backend/open_webui/utils/middleware.py | 27 ++-- backend/requirements.txt | 2 +- backend/start.sh | 9 ++ backend/start_windows.bat | 9 ++ pyproject.toml | 1 + 10 files changed, 220 insertions(+), 39 deletions(-) diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index c37b831de..3cec6edd7 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -1712,6 +1712,11 @@ RAG_WEB_SEARCH_CONCURRENT_REQUESTS = PersistentConfig( int(os.getenv("RAG_WEB_SEARCH_CONCURRENT_REQUESTS", "10")), ) +RAG_WEB_LOADER = PersistentConfig( + "RAG_WEB_LOADER", + "rag.web.loader", + os.environ.get("RAG_WEB_LOADER", "safe_web") +) #################################### # Images diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index 00270aabc..985624d81 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -129,6 +129,7 @@ from open_webui.config import ( AUDIO_TTS_VOICE, AUDIO_TTS_AZURE_SPEECH_REGION, AUDIO_TTS_AZURE_SPEECH_OUTPUT_FORMAT, + RAG_WEB_LOADER, WHISPER_MODEL, WHISPER_MODEL_AUTO_UPDATE, WHISPER_MODEL_DIR, @@ -526,6 +527,7 @@ app.state.config.BING_SEARCH_V7_SUBSCRIPTION_KEY = BING_SEARCH_V7_SUBSCRIPTION_K app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = RAG_WEB_SEARCH_RESULT_COUNT app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = RAG_WEB_SEARCH_CONCURRENT_REQUESTS +app.state.config.RAG_WEB_LOADER = RAG_WEB_LOADER app.state.EMBEDDING_FUNCTION = None app.state.ef = None diff --git a/backend/open_webui/retrieval/web/main.py b/backend/open_webui/retrieval/web/main.py index 1af8a70aa..28a749e7d 100644 --- a/backend/open_webui/retrieval/web/main.py +++ b/backend/open_webui/retrieval/web/main.py @@ -1,3 +1,5 @@ +import validators + from typing import Optional from urllib.parse import urlparse @@ -10,6 +12,8 @@ def get_filtered_results(results, filter_list): filtered_results = [] for result in results: url = result.get("url") or result.get("link", "") + if not validators.url(url): + continue domain = urlparse(url).netloc if any(domain.endswith(filtered_domain) for filtered_domain in filter_list): filtered_results.append(result) diff --git a/backend/open_webui/retrieval/web/utils.py b/backend/open_webui/retrieval/web/utils.py index a322bbbfc..bdc626749 100644 --- a/backend/open_webui/retrieval/web/utils.py +++ b/backend/open_webui/retrieval/web/utils.py @@ -1,16 +1,21 @@ +import asyncio +from datetime import datetime, time, timedelta import socket +import ssl import urllib.parse +import certifi import validators -from typing import Union, Sequence, Iterator +from typing import AsyncIterator, Dict, List, Optional, Union, Sequence, Iterator from langchain_community.document_loaders import ( WebBaseLoader, + PlaywrightURLLoader ) from langchain_core.documents import Document from open_webui.constants import ERROR_MESSAGES -from open_webui.config import ENABLE_RAG_LOCAL_WEB_FETCH +from open_webui.config import ENABLE_RAG_LOCAL_WEB_FETCH, RAG_WEB_LOADER from open_webui.env import SRC_LOG_LEVELS import logging @@ -42,6 +47,15 @@ def validate_url(url: Union[str, Sequence[str]]): else: return False +def safe_validate_urls(url: Sequence[str]) -> Sequence[str]: + valid_urls = [] + for u in url: + try: + if validate_url(u): + valid_urls.append(u) + except ValueError: + continue + return valid_urls def resolve_hostname(hostname): # Get address information @@ -53,6 +67,131 @@ def resolve_hostname(hostname): return ipv4_addresses, ipv6_addresses +def extract_metadata(soup, url): + metadata = { + "source": url + } + if title := soup.find("title"): + metadata["title"] = title.get_text() + if description := soup.find("meta", attrs={"name": "description"}): + metadata["description"] = description.get( + "content", "No description found." + ) + if html := soup.find("html"): + metadata["language"] = html.get("lang", "No language found.") + return metadata + +class SafePlaywrightURLLoader(PlaywrightURLLoader): + """Load HTML pages safely with Playwright, supporting SSL verification and rate limiting. + + Attributes: + urls (List[str]): List of URLs to load. + verify_ssl (bool): If True, verify SSL certificates. + requests_per_second (Optional[float]): Number of requests per second to limit to. + continue_on_failure (bool): If True, continue loading other URLs on failure. + headless (bool): If True, the browser will run in headless mode. + """ + + def __init__( + self, + urls: List[str], + verify_ssl: bool = True, + requests_per_second: Optional[float] = None, + continue_on_failure: bool = True, + headless: bool = True, + remove_selectors: Optional[List[str]] = None, + proxy: Optional[Dict[str, str]] = None + ): + """Initialize with additional safety parameters.""" + super().__init__( + urls=urls, + continue_on_failure=continue_on_failure, + headless=headless, + remove_selectors=remove_selectors, + proxy=proxy + ) + self.verify_ssl = verify_ssl + self.requests_per_second = requests_per_second + self.last_request_time = None + + def _verify_ssl_cert(self, url: str) -> bool: + """Verify SSL certificate for the given URL.""" + if not url.startswith("https://"): + return True + + try: + hostname = url.split("://")[-1].split("/")[0] + context = ssl.create_default_context(cafile=certifi.where()) + with context.wrap_socket(ssl.socket(), server_hostname=hostname) as s: + s.connect((hostname, 443)) + return True + except ssl.SSLError: + return False + except Exception as e: + log.warning(f"SSL verification failed for {url}: {str(e)}") + return False + + async def _wait_for_rate_limit(self): + """Wait to respect the rate limit if specified.""" + if self.requests_per_second and self.last_request_time: + min_interval = timedelta(seconds=1.0 / self.requests_per_second) + time_since_last = datetime.now() - self.last_request_time + if time_since_last < min_interval: + await asyncio.sleep((min_interval - time_since_last).total_seconds()) + self.last_request_time = datetime.now() + + def _sync_wait_for_rate_limit(self): + """Synchronous version of rate limit wait.""" + if self.requests_per_second and self.last_request_time: + min_interval = timedelta(seconds=1.0 / self.requests_per_second) + time_since_last = datetime.now() - self.last_request_time + if time_since_last < min_interval: + time.sleep((min_interval - time_since_last).total_seconds()) + self.last_request_time = datetime.now() + + async def _safe_process_url(self, url: str) -> bool: + """Perform safety checks before processing a URL.""" + if self.verify_ssl and not self._verify_ssl_cert(url): + raise ValueError(f"SSL certificate verification failed for {url}") + await self._wait_for_rate_limit() + return True + + def _safe_process_url_sync(self, url: str) -> bool: + """Synchronous version of safety checks.""" + if self.verify_ssl and not self._verify_ssl_cert(url): + raise ValueError(f"SSL certificate verification failed for {url}") + self._sync_wait_for_rate_limit() + return True + + async def alazy_load(self) -> AsyncIterator[Document]: + """Safely load URLs asynchronously.""" + parent_iterator = super().alazy_load() + + async for document in parent_iterator: + url = document.metadata["source"] + try: + await self._safe_process_url(url) + yield document + except Exception as e: + if self.continue_on_failure: + log.error(f"Error processing {url}, exception: {e}") + continue + raise e + + def lazy_load(self) -> Iterator[Document]: + """Safely load URLs synchronously.""" + parent_iterator = super().lazy_load() + + for document in parent_iterator: + url = document.metadata["source"] + try: + self._safe_process_url_sync(url) + yield document + except Exception as e: + if self.continue_on_failure: + log.error(f"Error processing {url}, exception: {e}") + continue + raise e class SafeWebBaseLoader(WebBaseLoader): """WebBaseLoader with enhanced error handling for URLs.""" @@ -65,15 +204,7 @@ class SafeWebBaseLoader(WebBaseLoader): text = soup.get_text(**self.bs_get_text_kwargs) # Build metadata - metadata = {"source": path} - if title := soup.find("title"): - metadata["title"] = title.get_text() - if description := soup.find("meta", attrs={"name": "description"}): - metadata["description"] = description.get( - "content", "No description found." - ) - if html := soup.find("html"): - metadata["language"] = html.get("lang", "No language found.") + metadata = extract_metadata(soup, path) yield Document(page_content=text, metadata=metadata) except Exception as e: @@ -87,11 +218,21 @@ def get_web_loader( requests_per_second: int = 2, ): # Check if the URL is valid - if not validate_url(urls): - raise ValueError(ERROR_MESSAGES.INVALID_URL) - return SafeWebBaseLoader( - urls, - verify_ssl=verify_ssl, - requests_per_second=requests_per_second, - continue_on_failure=True, - ) + safe_urls = safe_validate_urls([urls] if isinstance(urls, str) else urls) + + if RAG_WEB_LOADER.value == "chromium": + log.info("Using SafePlaywrightURLLoader") + return SafePlaywrightURLLoader( + safe_urls, + verify_ssl=verify_ssl, + requests_per_second=requests_per_second, + continue_on_failure=True, + ) + else: + log.info("Using SafeWebBaseLoader") + return SafeWebBaseLoader( + safe_urls, + verify_ssl=verify_ssl, + requests_per_second=requests_per_second, + continue_on_failure=True, + ) diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index 2cffd9ead..e65a76050 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -1238,9 +1238,11 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]: @router.post("/process/web/search") -def process_web_search( - request: Request, form_data: SearchForm, user=Depends(get_verified_user) +async def process_web_search( + request: Request, form_data: SearchForm, extra_params: dict, user=Depends(get_verified_user) ): + event_emitter = extra_params["__event_emitter__"] + try: logging.info( f"trying to web search with {request.app.state.config.RAG_WEB_SEARCH_ENGINE, form_data.query}" @@ -1258,6 +1260,18 @@ def process_web_search( log.debug(f"web_results: {web_results}") + await event_emitter( + { + "type": "status", + "data": { + "action": "web_search", + "description": "Loading {{count}} sites...", + "urls": [result.link for result in web_results], + "done": False + }, + } + ) + try: collection_name = form_data.collection_name if collection_name == "" or collection_name is None: @@ -1271,7 +1285,8 @@ def process_web_search( verify_ssl=request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION, requests_per_second=request.app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS, ) - docs = loader.load() + docs = [doc async for doc in loader.alazy_load()] + # docs = loader.load() save_docs_to_vector_db(request, docs, collection_name, overwrite=True) return { diff --git a/backend/open_webui/utils/middleware.py b/backend/open_webui/utils/middleware.py index 6b2329be1..27e499e0c 100644 --- a/backend/open_webui/utils/middleware.py +++ b/backend/open_webui/utils/middleware.py @@ -419,21 +419,16 @@ async def chat_web_search_handler( try: - # Offload process_web_search to a separate thread - loop = asyncio.get_running_loop() - with ThreadPoolExecutor() as executor: - results = await loop.run_in_executor( - executor, - lambda: process_web_search( - request, - SearchForm( - **{ - "query": searchQuery, - } - ), - user, - ), - ) + results = await process_web_search( + request, + SearchForm( + **{ + "query": searchQuery, + } + ), + extra_params=extra_params, + user=user + ) if results: await event_emitter( @@ -441,7 +436,7 @@ async def chat_web_search_handler( "type": "status", "data": { "action": "web_search", - "description": "Searched {{count}} sites", + "description": "Loaded {{count}} sites", "query": searchQuery, "urls": results["filenames"], "done": True, diff --git a/backend/requirements.txt b/backend/requirements.txt index eecb9c4a5..0dd7b1a8a 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -46,7 +46,7 @@ chromadb==0.6.2 pymilvus==2.5.0 qdrant-client~=1.12.0 opensearch-py==2.7.1 - +playwright==1.49.1 transformers sentence-transformers==3.3.1 diff --git a/backend/start.sh b/backend/start.sh index a945acb62..ce56b1867 100755 --- a/backend/start.sh +++ b/backend/start.sh @@ -3,6 +3,15 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) cd "$SCRIPT_DIR" || exit +# Add conditional Playwright browser installation +if [[ "${RAG_WEB_LOADER,,}" == "chromium" ]]; then + echo "Installing Playwright browsers..." + playwright install chromium + playwright install-deps chromium + + python -c "import nltk; nltk.download('punkt_tab')" +fi + KEY_FILE=.webui_secret_key PORT="${PORT:-8080}" diff --git a/backend/start_windows.bat b/backend/start_windows.bat index 3e8c6b97c..3b6446258 100644 --- a/backend/start_windows.bat +++ b/backend/start_windows.bat @@ -6,6 +6,15 @@ SETLOCAL ENABLEDELAYEDEXPANSION SET "SCRIPT_DIR=%~dp0" cd /d "%SCRIPT_DIR%" || exit /b +:: Add conditional Playwright browser installation +IF /I "%RAG_WEB_LOADER%" == "chromium" ( + echo Installing Playwright browsers... + playwright install chromium + playwright install-deps chromium + + python -c "import nltk; nltk.download('punkt_tab')" +) + SET "KEY_FILE=.webui_secret_key" IF "%PORT%"=="" SET PORT=8080 IF "%HOST%"=="" SET HOST=0.0.0.0 diff --git a/pyproject.toml b/pyproject.toml index edd01db8f..c8ec0f497 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,6 +53,7 @@ dependencies = [ "pymilvus==2.5.0", "qdrant-client~=1.12.0", "opensearch-py==2.7.1", + "playwright==1.49.1", "transformers", "sentence-transformers==3.3.1", From 2452e271cddccf0c835ae17f4505471eb41a4313 Mon Sep 17 00:00:00 2001 From: Rory <16675082+roryeckel@users.noreply.github.com> Date: Thu, 30 Jan 2025 20:31:31 -0600 Subject: [PATCH 02/96] Refine RAG_WEB_LOADER --- backend/open_webui/retrieval/web/utils.py | 34 +++++++++++------------ backend/start.sh | 2 +- backend/start_windows.bat | 2 +- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/backend/open_webui/retrieval/web/utils.py b/backend/open_webui/retrieval/web/utils.py index bdc626749..3c0c34074 100644 --- a/backend/open_webui/retrieval/web/utils.py +++ b/backend/open_webui/retrieval/web/utils.py @@ -5,6 +5,7 @@ import ssl import urllib.parse import certifi import validators +from collections import defaultdict from typing import AsyncIterator, Dict, List, Optional, Union, Sequence, Iterator from langchain_community.document_loaders import ( @@ -211,28 +212,27 @@ class SafeWebBaseLoader(WebBaseLoader): # Log the error and continue with the next URL log.error(f"Error loading {path}: {e}") +RAG_WEB_LOADERS = defaultdict(lambda: SafeWebBaseLoader) +RAG_WEB_LOADERS["playwright"] = SafePlaywrightURLLoader +RAG_WEB_LOADERS["safe_web"] = SafeWebBaseLoader def get_web_loader( urls: Union[str, Sequence[str]], verify_ssl: bool = True, requests_per_second: int = 2, ): - # Check if the URL is valid + # Check if the URLs are valid safe_urls = safe_validate_urls([urls] if isinstance(urls, str) else urls) - if RAG_WEB_LOADER.value == "chromium": - log.info("Using SafePlaywrightURLLoader") - return SafePlaywrightURLLoader( - safe_urls, - verify_ssl=verify_ssl, - requests_per_second=requests_per_second, - continue_on_failure=True, - ) - else: - log.info("Using SafeWebBaseLoader") - return SafeWebBaseLoader( - safe_urls, - verify_ssl=verify_ssl, - requests_per_second=requests_per_second, - continue_on_failure=True, - ) + # Get the appropriate WebLoader based on the configuration + WebLoaderClass = RAG_WEB_LOADERS[RAG_WEB_LOADER.value] + web_loader = WebLoaderClass( + safe_urls, + verify_ssl=verify_ssl, + requests_per_second=requests_per_second, + continue_on_failure=True, + ) + + log.debug("Using RAG_WEB_LOADER %s for %s URLs", web_loader.__class__.__name__, len(safe_urls)) + + return web_loader \ No newline at end of file diff --git a/backend/start.sh b/backend/start.sh index ce56b1867..2501f413f 100755 --- a/backend/start.sh +++ b/backend/start.sh @@ -4,7 +4,7 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) cd "$SCRIPT_DIR" || exit # Add conditional Playwright browser installation -if [[ "${RAG_WEB_LOADER,,}" == "chromium" ]]; then +if [[ "${RAG_WEB_LOADER,,}" == "playwright" ]]; then echo "Installing Playwright browsers..." playwright install chromium playwright install-deps chromium diff --git a/backend/start_windows.bat b/backend/start_windows.bat index 3b6446258..0f2792cc0 100644 --- a/backend/start_windows.bat +++ b/backend/start_windows.bat @@ -7,7 +7,7 @@ SET "SCRIPT_DIR=%~dp0" cd /d "%SCRIPT_DIR%" || exit /b :: Add conditional Playwright browser installation -IF /I "%RAG_WEB_LOADER%" == "chromium" ( +IF /I "%RAG_WEB_LOADER%" == "playwright" ( echo Installing Playwright browsers... playwright install chromium playwright install-deps chromium From 77ae73e659e6fea6da34c3ea913edb3dc4f037a9 Mon Sep 17 00:00:00 2001 From: Rory <16675082+roryeckel@users.noreply.github.com> Date: Thu, 30 Jan 2025 23:18:11 -0600 Subject: [PATCH 03/96] Adjust search event messages + translations --- backend/open_webui/routers/retrieval.py | 2 +- backend/open_webui/utils/middleware.py | 2 +- src/lib/components/chat/Messages/ResponseMessage.svelte | 1 + src/lib/i18n/locales/ar-BH/translation.json | 1 + src/lib/i18n/locales/bg-BG/translation.json | 1 + src/lib/i18n/locales/bn-BD/translation.json | 1 + src/lib/i18n/locales/ca-ES/translation.json | 1 + src/lib/i18n/locales/ceb-PH/translation.json | 1 + src/lib/i18n/locales/cs-CZ/translation.json | 1 + src/lib/i18n/locales/da-DK/translation.json | 1 + src/lib/i18n/locales/de-DE/translation.json | 1 + src/lib/i18n/locales/dg-DG/translation.json | 1 + src/lib/i18n/locales/el-GR/translation.json | 1 + src/lib/i18n/locales/en-GB/translation.json | 1 + src/lib/i18n/locales/en-US/translation.json | 1 + src/lib/i18n/locales/es-ES/translation.json | 1 + src/lib/i18n/locales/eu-ES/translation.json | 1 + src/lib/i18n/locales/fa-IR/translation.json | 1 + src/lib/i18n/locales/fi-FI/translation.json | 1 + src/lib/i18n/locales/fr-CA/translation.json | 1 + src/lib/i18n/locales/fr-FR/translation.json | 1 + src/lib/i18n/locales/he-IL/translation.json | 1 + src/lib/i18n/locales/hi-IN/translation.json | 1 + src/lib/i18n/locales/hr-HR/translation.json | 1 + src/lib/i18n/locales/hu-HU/translation.json | 1 + src/lib/i18n/locales/id-ID/translation.json | 1 + src/lib/i18n/locales/ie-GA/translation.json | 1 + src/lib/i18n/locales/it-IT/translation.json | 1 + src/lib/i18n/locales/ja-JP/translation.json | 1 + src/lib/i18n/locales/ka-GE/translation.json | 1 + src/lib/i18n/locales/ko-KR/translation.json | 1 + src/lib/i18n/locales/lt-LT/translation.json | 1 + src/lib/i18n/locales/ms-MY/translation.json | 1 + src/lib/i18n/locales/nb-NO/translation.json | 1 + src/lib/i18n/locales/nl-NL/translation.json | 1 + src/lib/i18n/locales/pa-IN/translation.json | 1 + src/lib/i18n/locales/pl-PL/translation.json | 1 + src/lib/i18n/locales/pt-BR/translation.json | 1 + src/lib/i18n/locales/pt-PT/translation.json | 1 + src/lib/i18n/locales/ro-RO/translation.json | 1 + src/lib/i18n/locales/ru-RU/translation.json | 1 + src/lib/i18n/locales/sk-SK/translation.json | 1 + src/lib/i18n/locales/sr-RS/translation.json | 1 + src/lib/i18n/locales/sv-SE/translation.json | 1 + src/lib/i18n/locales/th-TH/translation.json | 1 + src/lib/i18n/locales/tk-TW/translation.json | 1 + src/lib/i18n/locales/tr-TR/translation.json | 1 + src/lib/i18n/locales/uk-UA/translation.json | 1 + src/lib/i18n/locales/ur-PK/translation.json | 1 + src/lib/i18n/locales/vi-VN/translation.json | 1 + src/lib/i18n/locales/zh-CN/translation.json | 1 + src/lib/i18n/locales/zh-TW/translation.json | 1 + 52 files changed, 52 insertions(+), 2 deletions(-) diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index e65a76050..507698084 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -1265,7 +1265,7 @@ async def process_web_search( "type": "status", "data": { "action": "web_search", - "description": "Loading {{count}} sites...", + "description": "Loading {{count}} sites", "urls": [result.link for result in web_results], "done": False }, diff --git a/backend/open_webui/utils/middleware.py b/backend/open_webui/utils/middleware.py index dde34bec8..2a68d8d0a 100644 --- a/backend/open_webui/utils/middleware.py +++ b/backend/open_webui/utils/middleware.py @@ -436,7 +436,7 @@ async def chat_web_search_handler( "type": "status", "data": { "action": "web_search", - "description": "Loaded {{count}} sites", + "description": "Searched {{count}} sites", "query": searchQuery, "urls": results["filenames"], "done": True, diff --git a/src/lib/components/chat/Messages/ResponseMessage.svelte b/src/lib/components/chat/Messages/ResponseMessage.svelte index d6b31e6a0..9952f0f8e 100644 --- a/src/lib/components/chat/Messages/ResponseMessage.svelte +++ b/src/lib/components/chat/Messages/ResponseMessage.svelte @@ -580,6 +580,7 @@ : ''} text-gray-500 dark:text-gray-500 text-base line-clamp-1 text-wrap" > + {#if status?.description.includes('{{searchQuery}}')} {$i18n.t(status?.description, { searchQuery: status?.query diff --git a/src/lib/i18n/locales/ar-BH/translation.json b/src/lib/i18n/locales/ar-BH/translation.json index d115f4227..f2f9d25f2 100644 --- a/src/lib/i18n/locales/ar-BH/translation.json +++ b/src/lib/i18n/locales/ar-BH/translation.json @@ -561,6 +561,7 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "يمكن أن تصدر بعض الأخطاء. لذلك يجب التحقق من المعلومات المهمة", + "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/bg-BG/translation.json b/src/lib/i18n/locales/bg-BG/translation.json index f2eea288a..a4d06d389 100644 --- a/src/lib/i18n/locales/bg-BG/translation.json +++ b/src/lib/i18n/locales/bg-BG/translation.json @@ -561,6 +561,7 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMs могат да правят грешки. Проверете важните данни.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/bn-BD/translation.json b/src/lib/i18n/locales/bn-BD/translation.json index 18a270eb2..5ef08af65 100644 --- a/src/lib/i18n/locales/bn-BD/translation.json +++ b/src/lib/i18n/locales/bn-BD/translation.json @@ -561,6 +561,7 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM ভুল করতে পারে। গুরুত্বপূর্ণ তথ্য যাচাই করে নিন।", + "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/ca-ES/translation.json b/src/lib/i18n/locales/ca-ES/translation.json index 3eeef48d4..db3589a2d 100644 --- a/src/lib/i18n/locales/ca-ES/translation.json +++ b/src/lib/i18n/locales/ca-ES/translation.json @@ -561,6 +561,7 @@ "Listening...": "Escoltant...", "Llama.cpp": "Llama.cpp", "LLMs can make mistakes. Verify important information.": "Els models de llenguatge poden cometre errors. Verifica la informació important.", + "Loading {{count}} sites": "", "Local": "Local", "Local Models": "Models locals", "Lost": "Perdut", diff --git a/src/lib/i18n/locales/ceb-PH/translation.json b/src/lib/i18n/locales/ceb-PH/translation.json index 9029fbae0..23a7c01ba 100644 --- a/src/lib/i18n/locales/ceb-PH/translation.json +++ b/src/lib/i18n/locales/ceb-PH/translation.json @@ -561,6 +561,7 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Ang mga LLM mahimong masayop. ", + "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/cs-CZ/translation.json b/src/lib/i18n/locales/cs-CZ/translation.json index d07b3b6ec..6eefeb323 100644 --- a/src/lib/i18n/locales/cs-CZ/translation.json +++ b/src/lib/i18n/locales/cs-CZ/translation.json @@ -561,6 +561,7 @@ "Listening...": "Poslouchání...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM mohou dělat chyby. Ověřte si důležité informace.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "Lokální modely", "Lost": "Ztracený", diff --git a/src/lib/i18n/locales/da-DK/translation.json b/src/lib/i18n/locales/da-DK/translation.json index dca76c74d..4f7cbd00d 100644 --- a/src/lib/i18n/locales/da-DK/translation.json +++ b/src/lib/i18n/locales/da-DK/translation.json @@ -561,6 +561,7 @@ "Listening...": "Lytter...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM'er kan lave fejl. Bekræft vigtige oplysninger.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "Lokale modeller", "Lost": "", diff --git a/src/lib/i18n/locales/de-DE/translation.json b/src/lib/i18n/locales/de-DE/translation.json index f96e6bf08..bf934cedf 100644 --- a/src/lib/i18n/locales/de-DE/translation.json +++ b/src/lib/i18n/locales/de-DE/translation.json @@ -561,6 +561,7 @@ "Listening...": "Höre zu...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMs können Fehler machen. Überprüfe wichtige Informationen.", + "Loading {{count}} sites": "", "Local": "Lokal", "Local Models": "Lokale Modelle", "Lost": "Verloren", diff --git a/src/lib/i18n/locales/dg-DG/translation.json b/src/lib/i18n/locales/dg-DG/translation.json index aaca0565b..bfe1b7602 100644 --- a/src/lib/i18n/locales/dg-DG/translation.json +++ b/src/lib/i18n/locales/dg-DG/translation.json @@ -561,6 +561,7 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMs can make borks. Verify important info.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/el-GR/translation.json b/src/lib/i18n/locales/el-GR/translation.json index 8058615a0..dcc608769 100644 --- a/src/lib/i18n/locales/el-GR/translation.json +++ b/src/lib/i18n/locales/el-GR/translation.json @@ -561,6 +561,7 @@ "Listening...": "Ακούγεται...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Τα LLM μπορούν να κάνουν λάθη. Επαληθεύστε σημαντικές πληροφορίες.", + "Loading {{count}} sites": "", "Local": "Τοπικό", "Local Models": "Τοπικά Μοντέλα", "Lost": "Χαμένος", diff --git a/src/lib/i18n/locales/en-GB/translation.json b/src/lib/i18n/locales/en-GB/translation.json index 3075e7c0f..108c1d9e8 100644 --- a/src/lib/i18n/locales/en-GB/translation.json +++ b/src/lib/i18n/locales/en-GB/translation.json @@ -561,6 +561,7 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "", + "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/en-US/translation.json b/src/lib/i18n/locales/en-US/translation.json index 3075e7c0f..108c1d9e8 100644 --- a/src/lib/i18n/locales/en-US/translation.json +++ b/src/lib/i18n/locales/en-US/translation.json @@ -561,6 +561,7 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "", + "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/es-ES/translation.json b/src/lib/i18n/locales/es-ES/translation.json index d748943ca..4a1d42166 100644 --- a/src/lib/i18n/locales/es-ES/translation.json +++ b/src/lib/i18n/locales/es-ES/translation.json @@ -561,6 +561,7 @@ "Listening...": "Escuchando...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Los LLM pueden cometer errores. Verifica la información importante.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "Modelos locales", "Lost": "", diff --git a/src/lib/i18n/locales/eu-ES/translation.json b/src/lib/i18n/locales/eu-ES/translation.json index 1e5da410f..334fc11f6 100644 --- a/src/lib/i18n/locales/eu-ES/translation.json +++ b/src/lib/i18n/locales/eu-ES/translation.json @@ -561,6 +561,7 @@ "Listening...": "Entzuten...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMek akatsak egin ditzakete. Egiaztatu informazio garrantzitsua.", + "Loading {{count}} sites": "", "Local": "Lokala", "Local Models": "Modelo lokalak", "Lost": "Galduta", diff --git a/src/lib/i18n/locales/fa-IR/translation.json b/src/lib/i18n/locales/fa-IR/translation.json index cd647b8a7..512bb0dbb 100644 --- a/src/lib/i18n/locales/fa-IR/translation.json +++ b/src/lib/i18n/locales/fa-IR/translation.json @@ -561,6 +561,7 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "مدل\u200cهای زبانی بزرگ می\u200cتوانند اشتباه کنند. اطلاعات مهم را راستی\u200cآزمایی کنید.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/fi-FI/translation.json b/src/lib/i18n/locales/fi-FI/translation.json index 399da492f..bbc0a2d9a 100644 --- a/src/lib/i18n/locales/fi-FI/translation.json +++ b/src/lib/i18n/locales/fi-FI/translation.json @@ -561,6 +561,7 @@ "Listening...": "Kuuntelee...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Kielimallit voivat tehdä virheitä. Tarkista tärkeät tiedot.", + "Loading {{count}} sites": "", "Local": "Paikallinen", "Local Models": "Paikalliset mallit", "Lost": "Mennyt", diff --git a/src/lib/i18n/locales/fr-CA/translation.json b/src/lib/i18n/locales/fr-CA/translation.json index 051780d71..3f16e3a61 100644 --- a/src/lib/i18n/locales/fr-CA/translation.json +++ b/src/lib/i18n/locales/fr-CA/translation.json @@ -561,6 +561,7 @@ "Listening...": "En train d'écouter...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Les LLM peuvent faire des erreurs. Vérifiez les informations importantes.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "Modèles locaux", "Lost": "", diff --git a/src/lib/i18n/locales/fr-FR/translation.json b/src/lib/i18n/locales/fr-FR/translation.json index 62bdd8139..e8fe232d1 100644 --- a/src/lib/i18n/locales/fr-FR/translation.json +++ b/src/lib/i18n/locales/fr-FR/translation.json @@ -561,6 +561,7 @@ "Listening...": "Écoute en cours...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Les LLM peuvent faire des erreurs. Vérifiez les informations importantes.", + "Loading {{count}} sites": "", "Local": "Local", "Local Models": "Modèles locaux", "Lost": "Perdu", diff --git a/src/lib/i18n/locales/he-IL/translation.json b/src/lib/i18n/locales/he-IL/translation.json index 17756cbb0..d54937e0f 100644 --- a/src/lib/i18n/locales/he-IL/translation.json +++ b/src/lib/i18n/locales/he-IL/translation.json @@ -561,6 +561,7 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "מודלים בשפה טבעית יכולים לטעות. אמת מידע חשוב.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/hi-IN/translation.json b/src/lib/i18n/locales/hi-IN/translation.json index ea039a59d..9cd2a5899 100644 --- a/src/lib/i18n/locales/hi-IN/translation.json +++ b/src/lib/i18n/locales/hi-IN/translation.json @@ -561,6 +561,7 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "एलएलएम गलतियाँ कर सकते हैं। महत्वपूर्ण जानकारी सत्यापित करें.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/hr-HR/translation.json b/src/lib/i18n/locales/hr-HR/translation.json index c9e293389..72826f0d9 100644 --- a/src/lib/i18n/locales/hr-HR/translation.json +++ b/src/lib/i18n/locales/hr-HR/translation.json @@ -561,6 +561,7 @@ "Listening...": "Slušam...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM-ovi mogu pogriješiti. Provjerite važne informacije.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "Lokalni modeli", "Lost": "", diff --git a/src/lib/i18n/locales/hu-HU/translation.json b/src/lib/i18n/locales/hu-HU/translation.json index 0b9e15140..7b4e38819 100644 --- a/src/lib/i18n/locales/hu-HU/translation.json +++ b/src/lib/i18n/locales/hu-HU/translation.json @@ -561,6 +561,7 @@ "Listening...": "Hallgatás...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Az LLM-ek hibázhatnak. Ellenőrizze a fontos információkat.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "Helyi modellek", "Lost": "Elveszett", diff --git a/src/lib/i18n/locales/id-ID/translation.json b/src/lib/i18n/locales/id-ID/translation.json index 9c4d477a0..4d9a9e8a1 100644 --- a/src/lib/i18n/locales/id-ID/translation.json +++ b/src/lib/i18n/locales/id-ID/translation.json @@ -561,6 +561,7 @@ "Listening...": "Mendengarkan", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM dapat membuat kesalahan. Verifikasi informasi penting.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "Model Lokal", "Lost": "", diff --git a/src/lib/i18n/locales/ie-GA/translation.json b/src/lib/i18n/locales/ie-GA/translation.json index a07d36065..21e6005ed 100644 --- a/src/lib/i18n/locales/ie-GA/translation.json +++ b/src/lib/i18n/locales/ie-GA/translation.json @@ -561,6 +561,7 @@ "Listening...": "Éisteacht...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Is féidir le LLManna botúin a dhéanamh. Fíoraigh faisnéis thábhachtach.", + "Loading {{count}} sites": "", "Local": "Áitiúil", "Local Models": "Múnlaí Áitiúla", "Lost": "Cailleadh", diff --git a/src/lib/i18n/locales/it-IT/translation.json b/src/lib/i18n/locales/it-IT/translation.json index 2fa94bace..97b0a5e44 100644 --- a/src/lib/i18n/locales/it-IT/translation.json +++ b/src/lib/i18n/locales/it-IT/translation.json @@ -561,6 +561,7 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Gli LLM possono commettere errori. Verifica le informazioni importanti.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/ja-JP/translation.json b/src/lib/i18n/locales/ja-JP/translation.json index 492acce1b..ca1f5d1ff 100644 --- a/src/lib/i18n/locales/ja-JP/translation.json +++ b/src/lib/i18n/locales/ja-JP/translation.json @@ -561,6 +561,7 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM は間違いを犯す可能性があります。重要な情報を検証してください。", + "Loading {{count}} sites": "", "Local": "", "Local Models": "ローカルモデル", "Lost": "", diff --git a/src/lib/i18n/locales/ka-GE/translation.json b/src/lib/i18n/locales/ka-GE/translation.json index b26aa6c73..2d93798e5 100644 --- a/src/lib/i18n/locales/ka-GE/translation.json +++ b/src/lib/i18n/locales/ka-GE/translation.json @@ -561,6 +561,7 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "შესაძლოა LLM-ებმა შეცდომები დაუშვან. გადაამოწმეთ მნიშვნელოვანი ინფორმაცია.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/ko-KR/translation.json b/src/lib/i18n/locales/ko-KR/translation.json index ff33f8b96..c34103efa 100644 --- a/src/lib/i18n/locales/ko-KR/translation.json +++ b/src/lib/i18n/locales/ko-KR/translation.json @@ -561,6 +561,7 @@ "Listening...": "듣는 중...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM에 오류가 있을 수 있습니다. 중요한 정보는 확인이 필요합니다.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "로컬 모델", "Lost": "패배", diff --git a/src/lib/i18n/locales/lt-LT/translation.json b/src/lib/i18n/locales/lt-LT/translation.json index b562d3f7c..e21ed8f80 100644 --- a/src/lib/i18n/locales/lt-LT/translation.json +++ b/src/lib/i18n/locales/lt-LT/translation.json @@ -561,6 +561,7 @@ "Listening...": "Klausoma...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Dideli kalbos modeliai gali klysti. Patikrinkite atsakymų teisingumą.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "Lokalūs modeliai", "Lost": "", diff --git a/src/lib/i18n/locales/ms-MY/translation.json b/src/lib/i18n/locales/ms-MY/translation.json index 950a065af..a37f71376 100644 --- a/src/lib/i18n/locales/ms-MY/translation.json +++ b/src/lib/i18n/locales/ms-MY/translation.json @@ -561,6 +561,7 @@ "Listening...": "Mendengar...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM boleh membuat kesilapan. Sahkan maklumat penting", + "Loading {{count}} sites": "", "Local": "", "Local Models": "Model Tempatan", "Lost": "", diff --git a/src/lib/i18n/locales/nb-NO/translation.json b/src/lib/i18n/locales/nb-NO/translation.json index 1ce3a9aa9..9619ad9e1 100644 --- a/src/lib/i18n/locales/nb-NO/translation.json +++ b/src/lib/i18n/locales/nb-NO/translation.json @@ -561,6 +561,7 @@ "Listening...": "Lytter ...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Språkmodeller kan gjøre feil. Kontroller viktige opplysninger.", + "Loading {{count}} sites": "", "Local": "Lokal", "Local Models": "Lokale modeller", "Lost": "Tapt", diff --git a/src/lib/i18n/locales/nl-NL/translation.json b/src/lib/i18n/locales/nl-NL/translation.json index 68764d8a8..dd82213ed 100644 --- a/src/lib/i18n/locales/nl-NL/translation.json +++ b/src/lib/i18n/locales/nl-NL/translation.json @@ -561,6 +561,7 @@ "Listening...": "Aan het luisteren...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMs kunnen fouten maken. Verifieer belangrijke informatie.", + "Loading {{count}} sites": "", "Local": "Lokaal", "Local Models": "Lokale modellen", "Lost": "Verloren", diff --git a/src/lib/i18n/locales/pa-IN/translation.json b/src/lib/i18n/locales/pa-IN/translation.json index 58e8bf339..59826ad7c 100644 --- a/src/lib/i18n/locales/pa-IN/translation.json +++ b/src/lib/i18n/locales/pa-IN/translation.json @@ -561,6 +561,7 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMs ਗਲਤੀਆਂ ਕਰ ਸਕਦੇ ਹਨ। ਮਹੱਤਵਪੂਰਨ ਜਾਣਕਾਰੀ ਦੀ ਪੁਸ਼ਟੀ ਕਰੋ।", + "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/pl-PL/translation.json b/src/lib/i18n/locales/pl-PL/translation.json index 64168bfe3..aaf9c2c8c 100644 --- a/src/lib/i18n/locales/pl-PL/translation.json +++ b/src/lib/i18n/locales/pl-PL/translation.json @@ -561,6 +561,7 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMy mogą popełniać błędy. Zweryfikuj ważne informacje.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/pt-BR/translation.json b/src/lib/i18n/locales/pt-BR/translation.json index 4c416f409..3773f4314 100644 --- a/src/lib/i18n/locales/pt-BR/translation.json +++ b/src/lib/i18n/locales/pt-BR/translation.json @@ -561,6 +561,7 @@ "Listening...": "Escutando...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMs podem cometer erros. Verifique informações importantes.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "Modelos Locais", "Lost": "Perdeu", diff --git a/src/lib/i18n/locales/pt-PT/translation.json b/src/lib/i18n/locales/pt-PT/translation.json index 092669629..864fa90d5 100644 --- a/src/lib/i18n/locales/pt-PT/translation.json +++ b/src/lib/i18n/locales/pt-PT/translation.json @@ -561,6 +561,7 @@ "Listening...": "A escutar...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMs podem cometer erros. Verifique informações importantes.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "Modelos Locais", "Lost": "", diff --git a/src/lib/i18n/locales/ro-RO/translation.json b/src/lib/i18n/locales/ro-RO/translation.json index 01994c631..0de81364b 100644 --- a/src/lib/i18n/locales/ro-RO/translation.json +++ b/src/lib/i18n/locales/ro-RO/translation.json @@ -561,6 +561,7 @@ "Listening...": "Ascult...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM-urile pot face greșeli. Verificați informațiile importante.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "Modele Locale", "Lost": "Pierdut", diff --git a/src/lib/i18n/locales/ru-RU/translation.json b/src/lib/i18n/locales/ru-RU/translation.json index b4d6aa916..34ef2a98a 100644 --- a/src/lib/i18n/locales/ru-RU/translation.json +++ b/src/lib/i18n/locales/ru-RU/translation.json @@ -561,6 +561,7 @@ "Listening...": "Слушаю...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMs могут допускать ошибки. Проверяйте важную информацию.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "Локальные модели", "Lost": "", diff --git a/src/lib/i18n/locales/sk-SK/translation.json b/src/lib/i18n/locales/sk-SK/translation.json index 29d8622ea..f720bfed8 100644 --- a/src/lib/i18n/locales/sk-SK/translation.json +++ b/src/lib/i18n/locales/sk-SK/translation.json @@ -561,6 +561,7 @@ "Listening...": "Počúvanie...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM môžu robiť chyby. Overte si dôležité informácie.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "Lokálne modely", "Lost": "Stratený", diff --git a/src/lib/i18n/locales/sr-RS/translation.json b/src/lib/i18n/locales/sr-RS/translation.json index 8dc255655..a6d13b4dd 100644 --- a/src/lib/i18n/locales/sr-RS/translation.json +++ b/src/lib/i18n/locales/sr-RS/translation.json @@ -561,6 +561,7 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "ВЈМ-ови (LLM-ови) могу правити грешке. Проверите важне податке.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "Пораза", diff --git a/src/lib/i18n/locales/sv-SE/translation.json b/src/lib/i18n/locales/sv-SE/translation.json index 0abc82341..ae936e3ee 100644 --- a/src/lib/i18n/locales/sv-SE/translation.json +++ b/src/lib/i18n/locales/sv-SE/translation.json @@ -561,6 +561,7 @@ "Listening...": "Lyssnar...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM:er kan göra misstag. Granska viktig information.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "Lokala modeller", "Lost": "", diff --git a/src/lib/i18n/locales/th-TH/translation.json b/src/lib/i18n/locales/th-TH/translation.json index f1f23478b..a57eaa3b4 100644 --- a/src/lib/i18n/locales/th-TH/translation.json +++ b/src/lib/i18n/locales/th-TH/translation.json @@ -561,6 +561,7 @@ "Listening...": "กำลังฟัง...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMs สามารถทำผิดพลาดได้ ตรวจสอบข้อมูลสำคัญ", + "Loading {{count}} sites": "", "Local": "", "Local Models": "โมเดลท้องถิ่น", "Lost": "", diff --git a/src/lib/i18n/locales/tk-TW/translation.json b/src/lib/i18n/locales/tk-TW/translation.json index 3075e7c0f..108c1d9e8 100644 --- a/src/lib/i18n/locales/tk-TW/translation.json +++ b/src/lib/i18n/locales/tk-TW/translation.json @@ -561,6 +561,7 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "", + "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/tr-TR/translation.json b/src/lib/i18n/locales/tr-TR/translation.json index e6d05dfe1..892db13e6 100644 --- a/src/lib/i18n/locales/tr-TR/translation.json +++ b/src/lib/i18n/locales/tr-TR/translation.json @@ -561,6 +561,7 @@ "Listening...": "Dinleniyor...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM'ler hata yapabilir. Önemli bilgileri doğrulayın.", + "Loading {{count}} sites": "", "Local": "Yerel", "Local Models": "Yerel Modeller", "Lost": "Kayıp", diff --git a/src/lib/i18n/locales/uk-UA/translation.json b/src/lib/i18n/locales/uk-UA/translation.json index 449958c2b..c971d0049 100644 --- a/src/lib/i18n/locales/uk-UA/translation.json +++ b/src/lib/i18n/locales/uk-UA/translation.json @@ -561,6 +561,7 @@ "Listening...": "Слухаю...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMs можуть помилятися. Перевірте важливу інформацію.", + "Loading {{count}} sites": "", "Local": "Локальний", "Local Models": "Локальні моделі", "Lost": "Втрачене", diff --git a/src/lib/i18n/locales/ur-PK/translation.json b/src/lib/i18n/locales/ur-PK/translation.json index 437d943ae..31662f695 100644 --- a/src/lib/i18n/locales/ur-PK/translation.json +++ b/src/lib/i18n/locales/ur-PK/translation.json @@ -561,6 +561,7 @@ "Listening...": "سن رہے ہیں...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "ایل ایل ایم غلطیاں کر سکتے ہیں اہم معلومات کی تصدیق کریں", + "Loading {{count}} sites": "", "Local": "", "Local Models": "مقامی ماڈلز", "Lost": "گم شدہ", diff --git a/src/lib/i18n/locales/vi-VN/translation.json b/src/lib/i18n/locales/vi-VN/translation.json index c786b67a7..f6f8db552 100644 --- a/src/lib/i18n/locales/vi-VN/translation.json +++ b/src/lib/i18n/locales/vi-VN/translation.json @@ -561,6 +561,7 @@ "Listening...": "Đang nghe...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Hệ thống có thể tạo ra nội dung không chính xác hoặc sai. Hãy kiểm chứng kỹ lưỡng thông tin trước khi tiếp nhận và sử dụng.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/zh-CN/translation.json b/src/lib/i18n/locales/zh-CN/translation.json index da26f9cc9..9d8149471 100644 --- a/src/lib/i18n/locales/zh-CN/translation.json +++ b/src/lib/i18n/locales/zh-CN/translation.json @@ -561,6 +561,7 @@ "Listening...": "正在倾听...", "Llama.cpp": "Llama.cpp", "LLMs can make mistakes. Verify important information.": "大语言模型可能会生成误导性错误信息,请对关键信息加以验证。", + "Loading {{count}} sites": "", "Local": "本地", "Local Models": "本地模型", "Lost": "落败", diff --git a/src/lib/i18n/locales/zh-TW/translation.json b/src/lib/i18n/locales/zh-TW/translation.json index 81db88766..fb22677fa 100644 --- a/src/lib/i18n/locales/zh-TW/translation.json +++ b/src/lib/i18n/locales/zh-TW/translation.json @@ -561,6 +561,7 @@ "Listening...": "正在聆聽...", "Llama.cpp": "Llama.cpp", "LLMs can make mistakes. Verify important information.": "大型語言模型可能會出錯。請驗證重要資訊。", + "Loading {{count}} sites": "", "Local": "本機", "Local Models": "本機模型", "Lost": "已遺失", From a84e488a4ea681c580a2b9cca22fe176f8c0014c Mon Sep 17 00:00:00 2001 From: Rory <16675082+roryeckel@users.noreply.github.com> Date: Sat, 1 Feb 2025 22:58:28 -0600 Subject: [PATCH 04/96] Fix playwright in docker by updating unstructured --- backend/open_webui/retrieval/web/utils.py | 6 +++--- backend/requirements.txt | 2 +- pyproject.toml | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/backend/open_webui/retrieval/web/utils.py b/backend/open_webui/retrieval/web/utils.py index 3c0c34074..0568c795c 100644 --- a/backend/open_webui/retrieval/web/utils.py +++ b/backend/open_webui/retrieval/web/utils.py @@ -175,7 +175,7 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader): yield document except Exception as e: if self.continue_on_failure: - log.error(f"Error processing {url}, exception: {e}") + log.exception(e, "Error loading %s", url) continue raise e @@ -190,7 +190,7 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader): yield document except Exception as e: if self.continue_on_failure: - log.error(f"Error processing {url}, exception: {e}") + log.exception(e, "Error loading %s", url) continue raise e @@ -210,7 +210,7 @@ class SafeWebBaseLoader(WebBaseLoader): yield Document(page_content=text, metadata=metadata) except Exception as e: # Log the error and continue with the next URL - log.error(f"Error loading {path}: {e}") + log.exception(e, "Error loading %s", path) RAG_WEB_LOADERS = defaultdict(lambda: SafeWebBaseLoader) RAG_WEB_LOADERS["playwright"] = SafePlaywrightURLLoader diff --git a/backend/requirements.txt b/backend/requirements.txt index bb124bf11..cf5cb4a2f 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -60,7 +60,7 @@ fpdf2==2.8.2 pymdown-extensions==10.11.2 docx2txt==0.8 python-pptx==1.0.0 -unstructured==0.15.9 +unstructured==0.16.17 nltk==3.9.1 Markdown==3.7 pypandoc==1.13 diff --git a/pyproject.toml b/pyproject.toml index 41c79ddb8..6e7f607b3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,7 +66,7 @@ dependencies = [ "pymdown-extensions==10.11.2", "docx2txt==0.8", "python-pptx==1.0.0", - "unstructured==0.15.9", + "unstructured==0.16.17", "nltk==3.9.1", "Markdown==3.7", "pypandoc==1.13", From 8da33721d563754becd0d03bf86605441e0bd9e3 Mon Sep 17 00:00:00 2001 From: Rory <16675082+roryeckel@users.noreply.github.com> Date: Sun, 2 Feb 2025 17:58:09 -0600 Subject: [PATCH 05/96] Support PLAYWRIGHT_WS_URI --- backend/open_webui/config.py | 6 ++ backend/open_webui/main.py | 2 + backend/open_webui/retrieval/web/utils.py | 121 ++++++++++++++-------- backend/start.sh | 8 +- backend/start_windows.bat | 8 +- 5 files changed, 97 insertions(+), 48 deletions(-) diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index 278f50663..80e1e7ab2 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -1724,6 +1724,12 @@ RAG_WEB_LOADER = PersistentConfig( os.environ.get("RAG_WEB_LOADER", "safe_web") ) +PLAYWRIGHT_WS_URI = PersistentConfig( + "PLAYWRIGHT_WS_URI", + "rag.web.loader.playwright.ws.uri", + os.environ.get("PLAYWRIGHT_WS_URI", None) +) + #################################### # Images #################################### diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index c5dfad047..fd8a4c957 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -129,6 +129,7 @@ from open_webui.config import ( AUDIO_TTS_VOICE, AUDIO_TTS_AZURE_SPEECH_REGION, AUDIO_TTS_AZURE_SPEECH_OUTPUT_FORMAT, + PLAYWRIGHT_WS_URI, RAG_WEB_LOADER, WHISPER_MODEL, WHISPER_MODEL_AUTO_UPDATE, @@ -528,6 +529,7 @@ app.state.config.BING_SEARCH_V7_SUBSCRIPTION_KEY = BING_SEARCH_V7_SUBSCRIPTION_K app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = RAG_WEB_SEARCH_RESULT_COUNT app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = RAG_WEB_SEARCH_CONCURRENT_REQUESTS app.state.config.RAG_WEB_LOADER = RAG_WEB_LOADER +app.state.config.PLAYWRIGHT_WS_URI = PLAYWRIGHT_WS_URI app.state.EMBEDDING_FUNCTION = None app.state.ef = None diff --git a/backend/open_webui/retrieval/web/utils.py b/backend/open_webui/retrieval/web/utils.py index 0568c795c..3c77402c3 100644 --- a/backend/open_webui/retrieval/web/utils.py +++ b/backend/open_webui/retrieval/web/utils.py @@ -16,7 +16,7 @@ from langchain_core.documents import Document from open_webui.constants import ERROR_MESSAGES -from open_webui.config import ENABLE_RAG_LOCAL_WEB_FETCH, RAG_WEB_LOADER +from open_webui.config import ENABLE_RAG_LOCAL_WEB_FETCH, PLAYWRIGHT_WS_URI, RAG_WEB_LOADER from open_webui.env import SRC_LOG_LEVELS import logging @@ -83,7 +83,7 @@ def extract_metadata(soup, url): return metadata class SafePlaywrightURLLoader(PlaywrightURLLoader): - """Load HTML pages safely with Playwright, supporting SSL verification and rate limiting. + """Load HTML pages safely with Playwright, supporting SSL verification, rate limiting, and remote browser connection. Attributes: urls (List[str]): List of URLs to load. @@ -91,6 +91,7 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader): requests_per_second (Optional[float]): Number of requests per second to limit to. continue_on_failure (bool): If True, continue loading other URLs on failure. headless (bool): If True, the browser will run in headless mode. + playwright_ws_url (Optional[str]): WebSocket endpoint URI for remote browser connection. """ def __init__( @@ -101,19 +102,80 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader): continue_on_failure: bool = True, headless: bool = True, remove_selectors: Optional[List[str]] = None, - proxy: Optional[Dict[str, str]] = None + proxy: Optional[Dict[str, str]] = None, + playwright_ws_url: Optional[str] = None ): - """Initialize with additional safety parameters.""" + """Initialize with additional safety parameters and remote browser support.""" + # We'll set headless to False if using playwright_ws_url since it's handled by the remote browser super().__init__( urls=urls, continue_on_failure=continue_on_failure, - headless=headless, + headless=headless if playwright_ws_url is None else False, remove_selectors=remove_selectors, proxy=proxy ) self.verify_ssl = verify_ssl self.requests_per_second = requests_per_second self.last_request_time = None + self.playwright_ws_url = playwright_ws_url + + def lazy_load(self) -> Iterator[Document]: + """Safely load URLs synchronously with support for remote browser.""" + from playwright.sync_api import sync_playwright + + with sync_playwright() as p: + # Use remote browser if ws_endpoint is provided, otherwise use local browser + if self.playwright_ws_url: + browser = p.chromium.connect(self.playwright_ws_url) + else: + browser = p.chromium.launch(headless=self.headless, proxy=self.proxy) + + for url in self.urls: + try: + self._safe_process_url_sync(url) + page = browser.new_page() + response = page.goto(url) + if response is None: + raise ValueError(f"page.goto() returned None for url {url}") + + text = self.evaluator.evaluate(page, browser, response) + metadata = {"source": url} + yield Document(page_content=text, metadata=metadata) + except Exception as e: + if self.continue_on_failure: + log.exception(e, "Error loading %s", url) + continue + raise e + browser.close() + + async def alazy_load(self) -> AsyncIterator[Document]: + """Safely load URLs asynchronously with support for remote browser.""" + from playwright.async_api import async_playwright + + async with async_playwright() as p: + # Use remote browser if ws_endpoint is provided, otherwise use local browser + if self.playwright_ws_url: + browser = await p.chromium.connect(self.playwright_ws_url) + else: + browser = await p.chromium.launch(headless=self.headless, proxy=self.proxy) + + for url in self.urls: + try: + await self._safe_process_url(url) + page = await browser.new_page() + response = await page.goto(url) + if response is None: + raise ValueError(f"page.goto() returned None for url {url}") + + text = await self.evaluator.evaluate_async(page, browser, response) + metadata = {"source": url} + yield Document(page_content=text, metadata=metadata) + except Exception as e: + if self.continue_on_failure: + log.exception(e, "Error loading %s", url) + continue + raise e + await browser.close() def _verify_ssl_cert(self, url: str) -> bool: """Verify SSL certificate for the given URL.""" @@ -164,36 +226,6 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader): self._sync_wait_for_rate_limit() return True - async def alazy_load(self) -> AsyncIterator[Document]: - """Safely load URLs asynchronously.""" - parent_iterator = super().alazy_load() - - async for document in parent_iterator: - url = document.metadata["source"] - try: - await self._safe_process_url(url) - yield document - except Exception as e: - if self.continue_on_failure: - log.exception(e, "Error loading %s", url) - continue - raise e - - def lazy_load(self) -> Iterator[Document]: - """Safely load URLs synchronously.""" - parent_iterator = super().lazy_load() - - for document in parent_iterator: - url = document.metadata["source"] - try: - self._safe_process_url_sync(url) - yield document - except Exception as e: - if self.continue_on_failure: - log.exception(e, "Error loading %s", url) - continue - raise e - class SafeWebBaseLoader(WebBaseLoader): """WebBaseLoader with enhanced error handling for URLs.""" @@ -224,14 +256,19 @@ def get_web_loader( # Check if the URLs are valid safe_urls = safe_validate_urls([urls] if isinstance(urls, str) else urls) - # Get the appropriate WebLoader based on the configuration + web_loader_args = { + "urls": safe_urls, + "verify_ssl": verify_ssl, + "requests_per_second": requests_per_second, + "continue_on_failure": True + } + + if PLAYWRIGHT_WS_URI.value: + web_loader_args["playwright_ws_url"] = PLAYWRIGHT_WS_URI.value + + # Create the appropriate WebLoader based on the configuration WebLoaderClass = RAG_WEB_LOADERS[RAG_WEB_LOADER.value] - web_loader = WebLoaderClass( - safe_urls, - verify_ssl=verify_ssl, - requests_per_second=requests_per_second, - continue_on_failure=True, - ) + web_loader = WebLoaderClass(**web_loader_args) log.debug("Using RAG_WEB_LOADER %s for %s URLs", web_loader.__class__.__name__, len(safe_urls)) diff --git a/backend/start.sh b/backend/start.sh index 2501f413f..3b08cf549 100755 --- a/backend/start.sh +++ b/backend/start.sh @@ -5,9 +5,11 @@ cd "$SCRIPT_DIR" || exit # Add conditional Playwright browser installation if [[ "${RAG_WEB_LOADER,,}" == "playwright" ]]; then - echo "Installing Playwright browsers..." - playwright install chromium - playwright install-deps chromium + if [[ -z "${PLAYWRIGHT_WS_URI}" ]]; then + echo "Installing Playwright browsers..." + playwright install chromium + playwright install-deps chromium + fi python -c "import nltk; nltk.download('punkt_tab')" fi diff --git a/backend/start_windows.bat b/backend/start_windows.bat index 0f2792cc0..036e1f721 100644 --- a/backend/start_windows.bat +++ b/backend/start_windows.bat @@ -8,9 +8,11 @@ cd /d "%SCRIPT_DIR%" || exit /b :: Add conditional Playwright browser installation IF /I "%RAG_WEB_LOADER%" == "playwright" ( - echo Installing Playwright browsers... - playwright install chromium - playwright install-deps chromium + IF "%PLAYWRIGHT_WS_URI%" == "" ( + echo Installing Playwright browsers... + playwright install chromium + playwright install-deps chromium + ) python -c "import nltk; nltk.download('punkt_tab')" ) From c3df481b22d8bc13a7deb045e94b0bcf4235224e Mon Sep 17 00:00:00 2001 From: Rory <16675082+roryeckel@users.noreply.github.com> Date: Sun, 2 Feb 2025 19:44:40 -0600 Subject: [PATCH 06/96] Introduce docker-compose.playwright.yaml + run-compose update --- backend/requirements.txt | 2 +- docker-compose.playwright.yaml | 10 ++++++++++ run-compose.sh | 9 +++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 docker-compose.playwright.yaml diff --git a/backend/requirements.txt b/backend/requirements.txt index cf5cb4a2f..b08c17677 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -46,7 +46,7 @@ chromadb==0.6.2 pymilvus==2.5.0 qdrant-client~=1.12.0 opensearch-py==2.7.1 -playwright==1.49.1 +playwright==1.49.1 # Caution: version must match docker-compose.playwright.yaml transformers sentence-transformers==3.3.1 diff --git a/docker-compose.playwright.yaml b/docker-compose.playwright.yaml new file mode 100644 index 000000000..0a4bb3f76 --- /dev/null +++ b/docker-compose.playwright.yaml @@ -0,0 +1,10 @@ +services: + playwright: + image: mcr.microsoft.com/playwright:v1.49.1-noble # Version must match requirements.txt + container_name: playwright + command: npx -y playwright@1.49.1 run-server --port 3000 --host 0.0.0.0 + + open-webui: + environment: + - 'RAG_WEB_LOADER=playwright' + - 'PLAYWRIGHT_WS_URI=ws://playwright:3000' \ No newline at end of file diff --git a/run-compose.sh b/run-compose.sh index 21574e959..4fafedc6f 100755 --- a/run-compose.sh +++ b/run-compose.sh @@ -74,6 +74,7 @@ usage() { echo " --enable-api[port=PORT] Enable API and expose it on the specified port." echo " --webui[port=PORT] Set the port for the web user interface." echo " --data[folder=PATH] Bind mount for ollama data folder (by default will create the 'ollama' volume)." + echo " --playwright Enable Playwright support for web scraping." echo " --build Build the docker image before running the compose project." echo " --drop Drop the compose project." echo " -q, --quiet Run script in headless mode." @@ -100,6 +101,7 @@ webui_port=3000 headless=false build_image=false kill_compose=false +enable_playwright=false # Function to extract value from the parameter extract_value() { @@ -129,6 +131,9 @@ while [[ $# -gt 0 ]]; do value=$(extract_value "$key") data_dir=${value:-"./ollama-data"} ;; + --playwright) + enable_playwright=true + ;; --drop) kill_compose=true ;; @@ -182,6 +187,9 @@ else DEFAULT_COMPOSE_COMMAND+=" -f docker-compose.data.yaml" export OLLAMA_DATA_DIR=$data_dir # Set OLLAMA_DATA_DIR environment variable fi + if [[ $enable_playwright == true ]]; then + DEFAULT_COMPOSE_COMMAND+=" -f docker-compose.playwright.yaml" + fi if [[ -n $webui_port ]]; then export OPEN_WEBUI_PORT=$webui_port # Set OPEN_WEBUI_PORT environment variable fi @@ -201,6 +209,7 @@ echo -e " ${GREEN}${BOLD}GPU Count:${NC} ${OLLAMA_GPU_COUNT:-Not Enabled}" echo -e " ${GREEN}${BOLD}WebAPI Port:${NC} ${OLLAMA_WEBAPI_PORT:-Not Enabled}" echo -e " ${GREEN}${BOLD}Data Folder:${NC} ${data_dir:-Using ollama volume}" echo -e " ${GREEN}${BOLD}WebUI Port:${NC} $webui_port" +echo -e " ${GREEN}${BOLD}Playwright:${NC} ${enable_playwright:-false}" echo if [[ $headless == true ]]; then From 1b581b714f6749e51bf17c49434976a0c57900c6 Mon Sep 17 00:00:00 2001 From: Rory <16675082+roryeckel@users.noreply.github.com> Date: Mon, 3 Feb 2025 18:47:26 -0600 Subject: [PATCH 07/96] Moving code out of playwright branch --- backend/open_webui/retrieval/web/main.py | 4 ---- backend/open_webui/retrieval/web/utils.py | 19 +++++-------------- backend/open_webui/routers/retrieval.py | 16 +--------------- backend/open_webui/utils/middleware.py | 1 - .../chat/Messages/ResponseMessage.svelte | 1 - src/lib/i18n/locales/ar-BH/translation.json | 1 - src/lib/i18n/locales/bg-BG/translation.json | 1 - src/lib/i18n/locales/bn-BD/translation.json | 1 - src/lib/i18n/locales/ca-ES/translation.json | 1 - src/lib/i18n/locales/ceb-PH/translation.json | 1 - src/lib/i18n/locales/cs-CZ/translation.json | 1 - src/lib/i18n/locales/da-DK/translation.json | 1 - src/lib/i18n/locales/de-DE/translation.json | 1 - src/lib/i18n/locales/dg-DG/translation.json | 1 - src/lib/i18n/locales/el-GR/translation.json | 1 - src/lib/i18n/locales/en-GB/translation.json | 1 - src/lib/i18n/locales/en-US/translation.json | 1 - src/lib/i18n/locales/es-ES/translation.json | 1 - src/lib/i18n/locales/eu-ES/translation.json | 1 - src/lib/i18n/locales/fa-IR/translation.json | 1 - src/lib/i18n/locales/fi-FI/translation.json | 1 - src/lib/i18n/locales/fr-CA/translation.json | 1 - src/lib/i18n/locales/fr-FR/translation.json | 1 - src/lib/i18n/locales/he-IL/translation.json | 1 - src/lib/i18n/locales/hi-IN/translation.json | 1 - src/lib/i18n/locales/hr-HR/translation.json | 1 - src/lib/i18n/locales/hu-HU/translation.json | 1 - src/lib/i18n/locales/id-ID/translation.json | 1 - src/lib/i18n/locales/ie-GA/translation.json | 1 - src/lib/i18n/locales/it-IT/translation.json | 1 - src/lib/i18n/locales/ja-JP/translation.json | 1 - src/lib/i18n/locales/ka-GE/translation.json | 1 - src/lib/i18n/locales/ko-KR/translation.json | 1 - src/lib/i18n/locales/lt-LT/translation.json | 1 - src/lib/i18n/locales/ms-MY/translation.json | 1 - src/lib/i18n/locales/nb-NO/translation.json | 1 - src/lib/i18n/locales/nl-NL/translation.json | 1 - src/lib/i18n/locales/pa-IN/translation.json | 1 - src/lib/i18n/locales/pl-PL/translation.json | 1 - src/lib/i18n/locales/pt-BR/translation.json | 1 - src/lib/i18n/locales/pt-PT/translation.json | 1 - src/lib/i18n/locales/ro-RO/translation.json | 1 - src/lib/i18n/locales/ru-RU/translation.json | 1 - src/lib/i18n/locales/sk-SK/translation.json | 1 - src/lib/i18n/locales/sr-RS/translation.json | 1 - src/lib/i18n/locales/sv-SE/translation.json | 1 - src/lib/i18n/locales/th-TH/translation.json | 1 - src/lib/i18n/locales/tk-TW/translation.json | 1 - src/lib/i18n/locales/tr-TR/translation.json | 1 - src/lib/i18n/locales/uk-UA/translation.json | 1 - src/lib/i18n/locales/ur-PK/translation.json | 1 - src/lib/i18n/locales/vi-VN/translation.json | 1 - src/lib/i18n/locales/zh-CN/translation.json | 1 - src/lib/i18n/locales/zh-TW/translation.json | 1 - 54 files changed, 6 insertions(+), 84 deletions(-) diff --git a/backend/open_webui/retrieval/web/main.py b/backend/open_webui/retrieval/web/main.py index 28a749e7d..1af8a70aa 100644 --- a/backend/open_webui/retrieval/web/main.py +++ b/backend/open_webui/retrieval/web/main.py @@ -1,5 +1,3 @@ -import validators - from typing import Optional from urllib.parse import urlparse @@ -12,8 +10,6 @@ def get_filtered_results(results, filter_list): filtered_results = [] for result in results: url = result.get("url") or result.get("link", "") - if not validators.url(url): - continue domain = urlparse(url).netloc if any(domain.endswith(filtered_domain) for filtered_domain in filter_list): filtered_results.append(result) diff --git a/backend/open_webui/retrieval/web/utils.py b/backend/open_webui/retrieval/web/utils.py index 3c77402c3..ddbdc6004 100644 --- a/backend/open_webui/retrieval/web/utils.py +++ b/backend/open_webui/retrieval/web/utils.py @@ -48,16 +48,6 @@ def validate_url(url: Union[str, Sequence[str]]): else: return False -def safe_validate_urls(url: Sequence[str]) -> Sequence[str]: - valid_urls = [] - for u in url: - try: - if validate_url(u): - valid_urls.append(u) - except ValueError: - continue - return valid_urls - def resolve_hostname(hostname): # Get address information addr_info = socket.getaddrinfo(hostname, None) @@ -253,11 +243,12 @@ def get_web_loader( verify_ssl: bool = True, requests_per_second: int = 2, ): - # Check if the URLs are valid - safe_urls = safe_validate_urls([urls] if isinstance(urls, str) else urls) + # Check if the URL is valid + if not validate_url(urls): + raise ValueError(ERROR_MESSAGES.INVALID_URL) web_loader_args = { - "urls": safe_urls, + "urls": urls, "verify_ssl": verify_ssl, "requests_per_second": requests_per_second, "continue_on_failure": True @@ -270,6 +261,6 @@ def get_web_loader( WebLoaderClass = RAG_WEB_LOADERS[RAG_WEB_LOADER.value] web_loader = WebLoaderClass(**web_loader_args) - log.debug("Using RAG_WEB_LOADER %s for %s URLs", web_loader.__class__.__name__, len(safe_urls)) + log.debug("Using RAG_WEB_LOADER %s for %s URLs", web_loader.__class__.__name__, len(urls)) return web_loader \ No newline at end of file diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index 507698084..65fa12ab2 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -1239,10 +1239,8 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]: @router.post("/process/web/search") async def process_web_search( - request: Request, form_data: SearchForm, extra_params: dict, user=Depends(get_verified_user) + request: Request, form_data: SearchForm, user=Depends(get_verified_user) ): - event_emitter = extra_params["__event_emitter__"] - try: logging.info( f"trying to web search with {request.app.state.config.RAG_WEB_SEARCH_ENGINE, form_data.query}" @@ -1260,18 +1258,6 @@ async def process_web_search( log.debug(f"web_results: {web_results}") - await event_emitter( - { - "type": "status", - "data": { - "action": "web_search", - "description": "Loading {{count}} sites", - "urls": [result.link for result in web_results], - "done": False - }, - } - ) - try: collection_name = form_data.collection_name if collection_name == "" or collection_name is None: diff --git a/backend/open_webui/utils/middleware.py b/backend/open_webui/utils/middleware.py index 961e57b9e..77b820cd9 100644 --- a/backend/open_webui/utils/middleware.py +++ b/backend/open_webui/utils/middleware.py @@ -443,7 +443,6 @@ async def chat_web_search_handler( "query": searchQuery, } ), - extra_params=extra_params, user=user ) diff --git a/src/lib/components/chat/Messages/ResponseMessage.svelte b/src/lib/components/chat/Messages/ResponseMessage.svelte index 479180ae8..f6a4b0bc0 100644 --- a/src/lib/components/chat/Messages/ResponseMessage.svelte +++ b/src/lib/components/chat/Messages/ResponseMessage.svelte @@ -585,7 +585,6 @@ : ''} text-gray-500 dark:text-gray-500 text-base line-clamp-1 text-wrap" > - {#if status?.description.includes('{{searchQuery}}')} {$i18n.t(status?.description, { searchQuery: status?.query diff --git a/src/lib/i18n/locales/ar-BH/translation.json b/src/lib/i18n/locales/ar-BH/translation.json index 8149c9fe6..98a4e557b 100644 --- a/src/lib/i18n/locales/ar-BH/translation.json +++ b/src/lib/i18n/locales/ar-BH/translation.json @@ -565,7 +565,6 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "يمكن أن تصدر بعض الأخطاء. لذلك يجب التحقق من المعلومات المهمة", - "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/bg-BG/translation.json b/src/lib/i18n/locales/bg-BG/translation.json index e85f7bd53..e12120e77 100644 --- a/src/lib/i18n/locales/bg-BG/translation.json +++ b/src/lib/i18n/locales/bg-BG/translation.json @@ -565,7 +565,6 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMs могат да правят грешки. Проверете важните данни.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/bn-BD/translation.json b/src/lib/i18n/locales/bn-BD/translation.json index cfd4ccd0c..9aba7a61d 100644 --- a/src/lib/i18n/locales/bn-BD/translation.json +++ b/src/lib/i18n/locales/bn-BD/translation.json @@ -565,7 +565,6 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM ভুল করতে পারে। গুরুত্বপূর্ণ তথ্য যাচাই করে নিন।", - "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/ca-ES/translation.json b/src/lib/i18n/locales/ca-ES/translation.json index 6ebd4a790..6de03ddda 100644 --- a/src/lib/i18n/locales/ca-ES/translation.json +++ b/src/lib/i18n/locales/ca-ES/translation.json @@ -565,7 +565,6 @@ "Listening...": "Escoltant...", "Llama.cpp": "Llama.cpp", "LLMs can make mistakes. Verify important information.": "Els models de llenguatge poden cometre errors. Verifica la informació important.", - "Loading {{count}} sites": "", "Local": "Local", "Local Models": "Models locals", "Lost": "Perdut", diff --git a/src/lib/i18n/locales/ceb-PH/translation.json b/src/lib/i18n/locales/ceb-PH/translation.json index 3ca5045f0..8d85343a1 100644 --- a/src/lib/i18n/locales/ceb-PH/translation.json +++ b/src/lib/i18n/locales/ceb-PH/translation.json @@ -565,7 +565,6 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Ang mga LLM mahimong masayop. ", - "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/cs-CZ/translation.json b/src/lib/i18n/locales/cs-CZ/translation.json index 76d4b9d39..fe7c4cef3 100644 --- a/src/lib/i18n/locales/cs-CZ/translation.json +++ b/src/lib/i18n/locales/cs-CZ/translation.json @@ -565,7 +565,6 @@ "Listening...": "Poslouchání...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM mohou dělat chyby. Ověřte si důležité informace.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "Lokální modely", "Lost": "Ztracený", diff --git a/src/lib/i18n/locales/da-DK/translation.json b/src/lib/i18n/locales/da-DK/translation.json index 79c959542..a358dc25b 100644 --- a/src/lib/i18n/locales/da-DK/translation.json +++ b/src/lib/i18n/locales/da-DK/translation.json @@ -565,7 +565,6 @@ "Listening...": "Lytter...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM'er kan lave fejl. Bekræft vigtige oplysninger.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "Lokale modeller", "Lost": "", diff --git a/src/lib/i18n/locales/de-DE/translation.json b/src/lib/i18n/locales/de-DE/translation.json index f8d09d322..64ec0776d 100644 --- a/src/lib/i18n/locales/de-DE/translation.json +++ b/src/lib/i18n/locales/de-DE/translation.json @@ -565,7 +565,6 @@ "Listening...": "Höre zu...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMs können Fehler machen. Überprüfe wichtige Informationen.", - "Loading {{count}} sites": "", "Local": "Lokal", "Local Models": "Lokale Modelle", "Lost": "Verloren", diff --git a/src/lib/i18n/locales/dg-DG/translation.json b/src/lib/i18n/locales/dg-DG/translation.json index 9439b4b37..6733ae692 100644 --- a/src/lib/i18n/locales/dg-DG/translation.json +++ b/src/lib/i18n/locales/dg-DG/translation.json @@ -565,7 +565,6 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMs can make borks. Verify important info.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/el-GR/translation.json b/src/lib/i18n/locales/el-GR/translation.json index 7e531fc94..c44018fe0 100644 --- a/src/lib/i18n/locales/el-GR/translation.json +++ b/src/lib/i18n/locales/el-GR/translation.json @@ -565,7 +565,6 @@ "Listening...": "Ακούγεται...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Τα LLM μπορούν να κάνουν λάθη. Επαληθεύστε σημαντικές πληροφορίες.", - "Loading {{count}} sites": "", "Local": "Τοπικό", "Local Models": "Τοπικά Μοντέλα", "Lost": "Χαμένος", diff --git a/src/lib/i18n/locales/en-GB/translation.json b/src/lib/i18n/locales/en-GB/translation.json index da563d920..8d8c14864 100644 --- a/src/lib/i18n/locales/en-GB/translation.json +++ b/src/lib/i18n/locales/en-GB/translation.json @@ -565,7 +565,6 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "", - "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/en-US/translation.json b/src/lib/i18n/locales/en-US/translation.json index da563d920..8d8c14864 100644 --- a/src/lib/i18n/locales/en-US/translation.json +++ b/src/lib/i18n/locales/en-US/translation.json @@ -565,7 +565,6 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "", - "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/es-ES/translation.json b/src/lib/i18n/locales/es-ES/translation.json index 4a0162a53..1b75f4727 100644 --- a/src/lib/i18n/locales/es-ES/translation.json +++ b/src/lib/i18n/locales/es-ES/translation.json @@ -565,7 +565,6 @@ "Listening...": "Escuchando...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Los LLM pueden cometer errores. Verifica la información importante.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "Modelos locales", "Lost": "", diff --git a/src/lib/i18n/locales/eu-ES/translation.json b/src/lib/i18n/locales/eu-ES/translation.json index 1bbb0fbe3..47c5df84e 100644 --- a/src/lib/i18n/locales/eu-ES/translation.json +++ b/src/lib/i18n/locales/eu-ES/translation.json @@ -565,7 +565,6 @@ "Listening...": "Entzuten...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMek akatsak egin ditzakete. Egiaztatu informazio garrantzitsua.", - "Loading {{count}} sites": "", "Local": "Lokala", "Local Models": "Modelo lokalak", "Lost": "Galduta", diff --git a/src/lib/i18n/locales/fa-IR/translation.json b/src/lib/i18n/locales/fa-IR/translation.json index 80a646e4f..75609ffdf 100644 --- a/src/lib/i18n/locales/fa-IR/translation.json +++ b/src/lib/i18n/locales/fa-IR/translation.json @@ -565,7 +565,6 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "مدل\u200cهای زبانی بزرگ می\u200cتوانند اشتباه کنند. اطلاعات مهم را راستی\u200cآزمایی کنید.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/fi-FI/translation.json b/src/lib/i18n/locales/fi-FI/translation.json index bd60c5789..e9f7f5653 100644 --- a/src/lib/i18n/locales/fi-FI/translation.json +++ b/src/lib/i18n/locales/fi-FI/translation.json @@ -565,7 +565,6 @@ "Listening...": "Kuuntelee...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Kielimallit voivat tehdä virheitä. Tarkista tärkeät tiedot.", - "Loading {{count}} sites": "", "Local": "Paikallinen", "Local Models": "Paikalliset mallit", "Lost": "Mennyt", diff --git a/src/lib/i18n/locales/fr-CA/translation.json b/src/lib/i18n/locales/fr-CA/translation.json index 6f507c133..a6abf8908 100644 --- a/src/lib/i18n/locales/fr-CA/translation.json +++ b/src/lib/i18n/locales/fr-CA/translation.json @@ -565,7 +565,6 @@ "Listening...": "En train d'écouter...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Les LLM peuvent faire des erreurs. Vérifiez les informations importantes.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "Modèles locaux", "Lost": "", diff --git a/src/lib/i18n/locales/fr-FR/translation.json b/src/lib/i18n/locales/fr-FR/translation.json index 491a86f88..d188ff917 100644 --- a/src/lib/i18n/locales/fr-FR/translation.json +++ b/src/lib/i18n/locales/fr-FR/translation.json @@ -565,7 +565,6 @@ "Listening...": "Écoute en cours...", "Llama.cpp": "Llama.cpp", "LLMs can make mistakes. Verify important information.": "Les LLM peuvent faire des erreurs. Vérifiez les informations importantes.", - "Loading {{count}} sites": "", "Local": "Local", "Local Models": "Modèles locaux", "Lost": "Perdu", diff --git a/src/lib/i18n/locales/he-IL/translation.json b/src/lib/i18n/locales/he-IL/translation.json index 06599d9d0..695fd575e 100644 --- a/src/lib/i18n/locales/he-IL/translation.json +++ b/src/lib/i18n/locales/he-IL/translation.json @@ -565,7 +565,6 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "מודלים בשפה טבעית יכולים לטעות. אמת מידע חשוב.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/hi-IN/translation.json b/src/lib/i18n/locales/hi-IN/translation.json index 70d4584e1..e98ca6e0e 100644 --- a/src/lib/i18n/locales/hi-IN/translation.json +++ b/src/lib/i18n/locales/hi-IN/translation.json @@ -565,7 +565,6 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "एलएलएम गलतियाँ कर सकते हैं। महत्वपूर्ण जानकारी सत्यापित करें.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/hr-HR/translation.json b/src/lib/i18n/locales/hr-HR/translation.json index b4a27a781..61c6f7601 100644 --- a/src/lib/i18n/locales/hr-HR/translation.json +++ b/src/lib/i18n/locales/hr-HR/translation.json @@ -565,7 +565,6 @@ "Listening...": "Slušam...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM-ovi mogu pogriješiti. Provjerite važne informacije.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "Lokalni modeli", "Lost": "", diff --git a/src/lib/i18n/locales/hu-HU/translation.json b/src/lib/i18n/locales/hu-HU/translation.json index 764cf2db5..14818eeb1 100644 --- a/src/lib/i18n/locales/hu-HU/translation.json +++ b/src/lib/i18n/locales/hu-HU/translation.json @@ -565,7 +565,6 @@ "Listening...": "Hallgatás...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Az LLM-ek hibázhatnak. Ellenőrizze a fontos információkat.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "Helyi modellek", "Lost": "Elveszett", diff --git a/src/lib/i18n/locales/id-ID/translation.json b/src/lib/i18n/locales/id-ID/translation.json index e47acd089..6a61ea717 100644 --- a/src/lib/i18n/locales/id-ID/translation.json +++ b/src/lib/i18n/locales/id-ID/translation.json @@ -565,7 +565,6 @@ "Listening...": "Mendengarkan", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM dapat membuat kesalahan. Verifikasi informasi penting.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "Model Lokal", "Lost": "", diff --git a/src/lib/i18n/locales/ie-GA/translation.json b/src/lib/i18n/locales/ie-GA/translation.json index 1e114503b..4f320ba7f 100644 --- a/src/lib/i18n/locales/ie-GA/translation.json +++ b/src/lib/i18n/locales/ie-GA/translation.json @@ -565,7 +565,6 @@ "Listening...": "Éisteacht...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Is féidir le LLManna botúin a dhéanamh. Fíoraigh faisnéis thábhachtach.", - "Loading {{count}} sites": "", "Local": "Áitiúil", "Local Models": "Múnlaí Áitiúla", "Lost": "Cailleadh", diff --git a/src/lib/i18n/locales/it-IT/translation.json b/src/lib/i18n/locales/it-IT/translation.json index 08954b502..2770d9c9c 100644 --- a/src/lib/i18n/locales/it-IT/translation.json +++ b/src/lib/i18n/locales/it-IT/translation.json @@ -565,7 +565,6 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Gli LLM possono commettere errori. Verifica le informazioni importanti.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/ja-JP/translation.json b/src/lib/i18n/locales/ja-JP/translation.json index e529a6092..baa80bc2b 100644 --- a/src/lib/i18n/locales/ja-JP/translation.json +++ b/src/lib/i18n/locales/ja-JP/translation.json @@ -565,7 +565,6 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM は間違いを犯す可能性があります。重要な情報を検証してください。", - "Loading {{count}} sites": "", "Local": "", "Local Models": "ローカルモデル", "Lost": "", diff --git a/src/lib/i18n/locales/ka-GE/translation.json b/src/lib/i18n/locales/ka-GE/translation.json index ffb1eb9fc..edac4d43f 100644 --- a/src/lib/i18n/locales/ka-GE/translation.json +++ b/src/lib/i18n/locales/ka-GE/translation.json @@ -565,7 +565,6 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "შესაძლოა LLM-ებმა შეცდომები დაუშვან. გადაამოწმეთ მნიშვნელოვანი ინფორმაცია.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/ko-KR/translation.json b/src/lib/i18n/locales/ko-KR/translation.json index 56703424f..43de6554d 100644 --- a/src/lib/i18n/locales/ko-KR/translation.json +++ b/src/lib/i18n/locales/ko-KR/translation.json @@ -565,7 +565,6 @@ "Listening...": "듣는 중...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM에 오류가 있을 수 있습니다. 중요한 정보는 확인이 필요합니다.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "로컬 모델", "Lost": "패배", diff --git a/src/lib/i18n/locales/lt-LT/translation.json b/src/lib/i18n/locales/lt-LT/translation.json index 09f8ed02b..5a644032e 100644 --- a/src/lib/i18n/locales/lt-LT/translation.json +++ b/src/lib/i18n/locales/lt-LT/translation.json @@ -565,7 +565,6 @@ "Listening...": "Klausoma...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Dideli kalbos modeliai gali klysti. Patikrinkite atsakymų teisingumą.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "Lokalūs modeliai", "Lost": "", diff --git a/src/lib/i18n/locales/ms-MY/translation.json b/src/lib/i18n/locales/ms-MY/translation.json index 31f29d4b6..236d0bbc5 100644 --- a/src/lib/i18n/locales/ms-MY/translation.json +++ b/src/lib/i18n/locales/ms-MY/translation.json @@ -565,7 +565,6 @@ "Listening...": "Mendengar...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM boleh membuat kesilapan. Sahkan maklumat penting", - "Loading {{count}} sites": "", "Local": "", "Local Models": "Model Tempatan", "Lost": "", diff --git a/src/lib/i18n/locales/nb-NO/translation.json b/src/lib/i18n/locales/nb-NO/translation.json index 352044fee..5121d297e 100644 --- a/src/lib/i18n/locales/nb-NO/translation.json +++ b/src/lib/i18n/locales/nb-NO/translation.json @@ -565,7 +565,6 @@ "Listening...": "Lytter ...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Språkmodeller kan gjøre feil. Kontroller viktige opplysninger.", - "Loading {{count}} sites": "", "Local": "Lokal", "Local Models": "Lokale modeller", "Lost": "Tapt", diff --git a/src/lib/i18n/locales/nl-NL/translation.json b/src/lib/i18n/locales/nl-NL/translation.json index e05391048..78dfd0a54 100644 --- a/src/lib/i18n/locales/nl-NL/translation.json +++ b/src/lib/i18n/locales/nl-NL/translation.json @@ -565,7 +565,6 @@ "Listening...": "Aan het luisteren...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMs kunnen fouten maken. Verifieer belangrijke informatie.", - "Loading {{count}} sites": "", "Local": "Lokaal", "Local Models": "Lokale modellen", "Lost": "Verloren", diff --git a/src/lib/i18n/locales/pa-IN/translation.json b/src/lib/i18n/locales/pa-IN/translation.json index 408be6142..cae4f25c6 100644 --- a/src/lib/i18n/locales/pa-IN/translation.json +++ b/src/lib/i18n/locales/pa-IN/translation.json @@ -565,7 +565,6 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMs ਗਲਤੀਆਂ ਕਰ ਸਕਦੇ ਹਨ। ਮਹੱਤਵਪੂਰਨ ਜਾਣਕਾਰੀ ਦੀ ਪੁਸ਼ਟੀ ਕਰੋ।", - "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/pl-PL/translation.json b/src/lib/i18n/locales/pl-PL/translation.json index 2f38deaf7..caebf0272 100644 --- a/src/lib/i18n/locales/pl-PL/translation.json +++ b/src/lib/i18n/locales/pl-PL/translation.json @@ -565,7 +565,6 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMy mogą popełniać błędy. Zweryfikuj ważne informacje.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/pt-BR/translation.json b/src/lib/i18n/locales/pt-BR/translation.json index fa1187e85..b89592035 100644 --- a/src/lib/i18n/locales/pt-BR/translation.json +++ b/src/lib/i18n/locales/pt-BR/translation.json @@ -565,7 +565,6 @@ "Listening...": "Escutando...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMs podem cometer erros. Verifique informações importantes.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "Modelos Locais", "Lost": "Perdeu", diff --git a/src/lib/i18n/locales/pt-PT/translation.json b/src/lib/i18n/locales/pt-PT/translation.json index 0863ed7da..18e2b137a 100644 --- a/src/lib/i18n/locales/pt-PT/translation.json +++ b/src/lib/i18n/locales/pt-PT/translation.json @@ -565,7 +565,6 @@ "Listening...": "A escutar...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMs podem cometer erros. Verifique informações importantes.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "Modelos Locais", "Lost": "", diff --git a/src/lib/i18n/locales/ro-RO/translation.json b/src/lib/i18n/locales/ro-RO/translation.json index 1a3905d4f..2d6c02277 100644 --- a/src/lib/i18n/locales/ro-RO/translation.json +++ b/src/lib/i18n/locales/ro-RO/translation.json @@ -565,7 +565,6 @@ "Listening...": "Ascult...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM-urile pot face greșeli. Verificați informațiile importante.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "Modele Locale", "Lost": "Pierdut", diff --git a/src/lib/i18n/locales/ru-RU/translation.json b/src/lib/i18n/locales/ru-RU/translation.json index ca6d12bae..f0fc3c8e1 100644 --- a/src/lib/i18n/locales/ru-RU/translation.json +++ b/src/lib/i18n/locales/ru-RU/translation.json @@ -565,7 +565,6 @@ "Listening...": "Слушаю...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMs могут допускать ошибки. Проверяйте важную информацию.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "Локальные модели", "Lost": "", diff --git a/src/lib/i18n/locales/sk-SK/translation.json b/src/lib/i18n/locales/sk-SK/translation.json index d4bf1ea35..8e4742fbb 100644 --- a/src/lib/i18n/locales/sk-SK/translation.json +++ b/src/lib/i18n/locales/sk-SK/translation.json @@ -565,7 +565,6 @@ "Listening...": "Počúvanie...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM môžu robiť chyby. Overte si dôležité informácie.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "Lokálne modely", "Lost": "Stratený", diff --git a/src/lib/i18n/locales/sr-RS/translation.json b/src/lib/i18n/locales/sr-RS/translation.json index c6bdaf359..a16b78328 100644 --- a/src/lib/i18n/locales/sr-RS/translation.json +++ b/src/lib/i18n/locales/sr-RS/translation.json @@ -565,7 +565,6 @@ "Listening...": "Слушам...", "Llama.cpp": "Llama.cpp", "LLMs can make mistakes. Verify important information.": "ВЈМ-ови (LLM-ови) могу правити грешке. Проверите важне податке.", - "Loading {{count}} sites": "", "Local": "Локално", "Local Models": "Локални модели", "Lost": "Пораза", diff --git a/src/lib/i18n/locales/sv-SE/translation.json b/src/lib/i18n/locales/sv-SE/translation.json index c8ea56901..a2d103b23 100644 --- a/src/lib/i18n/locales/sv-SE/translation.json +++ b/src/lib/i18n/locales/sv-SE/translation.json @@ -565,7 +565,6 @@ "Listening...": "Lyssnar...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM:er kan göra misstag. Granska viktig information.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "Lokala modeller", "Lost": "", diff --git a/src/lib/i18n/locales/th-TH/translation.json b/src/lib/i18n/locales/th-TH/translation.json index 8f41d77d6..8b2e59199 100644 --- a/src/lib/i18n/locales/th-TH/translation.json +++ b/src/lib/i18n/locales/th-TH/translation.json @@ -565,7 +565,6 @@ "Listening...": "กำลังฟัง...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMs สามารถทำผิดพลาดได้ ตรวจสอบข้อมูลสำคัญ", - "Loading {{count}} sites": "", "Local": "", "Local Models": "โมเดลท้องถิ่น", "Lost": "", diff --git a/src/lib/i18n/locales/tk-TW/translation.json b/src/lib/i18n/locales/tk-TW/translation.json index da563d920..8d8c14864 100644 --- a/src/lib/i18n/locales/tk-TW/translation.json +++ b/src/lib/i18n/locales/tk-TW/translation.json @@ -565,7 +565,6 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "", - "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/tr-TR/translation.json b/src/lib/i18n/locales/tr-TR/translation.json index 6eb6bcb7c..8195df8d0 100644 --- a/src/lib/i18n/locales/tr-TR/translation.json +++ b/src/lib/i18n/locales/tr-TR/translation.json @@ -565,7 +565,6 @@ "Listening...": "Dinleniyor...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM'ler hata yapabilir. Önemli bilgileri doğrulayın.", - "Loading {{count}} sites": "", "Local": "Yerel", "Local Models": "Yerel Modeller", "Lost": "Kayıp", diff --git a/src/lib/i18n/locales/uk-UA/translation.json b/src/lib/i18n/locales/uk-UA/translation.json index 1bd9a15bd..2aa92ae15 100644 --- a/src/lib/i18n/locales/uk-UA/translation.json +++ b/src/lib/i18n/locales/uk-UA/translation.json @@ -565,7 +565,6 @@ "Listening...": "Слухаю...", "Llama.cpp": "Llama.cpp", "LLMs can make mistakes. Verify important information.": "LLMs можуть помилятися. Перевірте важливу інформацію.", - "Loading {{count}} sites": "", "Local": "Локальний", "Local Models": "Локальні моделі", "Lost": "Втрачене", diff --git a/src/lib/i18n/locales/ur-PK/translation.json b/src/lib/i18n/locales/ur-PK/translation.json index 806598987..f733eda3d 100644 --- a/src/lib/i18n/locales/ur-PK/translation.json +++ b/src/lib/i18n/locales/ur-PK/translation.json @@ -565,7 +565,6 @@ "Listening...": "سن رہے ہیں...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "ایل ایل ایم غلطیاں کر سکتے ہیں اہم معلومات کی تصدیق کریں", - "Loading {{count}} sites": "", "Local": "", "Local Models": "مقامی ماڈلز", "Lost": "گم شدہ", diff --git a/src/lib/i18n/locales/vi-VN/translation.json b/src/lib/i18n/locales/vi-VN/translation.json index 9a6e89610..93e98680b 100644 --- a/src/lib/i18n/locales/vi-VN/translation.json +++ b/src/lib/i18n/locales/vi-VN/translation.json @@ -565,7 +565,6 @@ "Listening...": "Đang nghe...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Hệ thống có thể tạo ra nội dung không chính xác hoặc sai. Hãy kiểm chứng kỹ lưỡng thông tin trước khi tiếp nhận và sử dụng.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/zh-CN/translation.json b/src/lib/i18n/locales/zh-CN/translation.json index b8fbd3617..f32067afb 100644 --- a/src/lib/i18n/locales/zh-CN/translation.json +++ b/src/lib/i18n/locales/zh-CN/translation.json @@ -565,7 +565,6 @@ "Listening...": "正在倾听...", "Llama.cpp": "Llama.cpp", "LLMs can make mistakes. Verify important information.": "大语言模型可能会生成误导性错误信息,请对关键信息加以验证。", - "Loading {{count}} sites": "", "Local": "本地", "Local Models": "本地模型", "Lost": "落败", diff --git a/src/lib/i18n/locales/zh-TW/translation.json b/src/lib/i18n/locales/zh-TW/translation.json index c838cad4e..78dcddaff 100644 --- a/src/lib/i18n/locales/zh-TW/translation.json +++ b/src/lib/i18n/locales/zh-TW/translation.json @@ -565,7 +565,6 @@ "Listening...": "正在聆聽...", "Llama.cpp": "Llama.cpp", "LLMs can make mistakes. Verify important information.": "大型語言模型可能會出錯。請驗證重要資訊。", - "Loading {{count}} sites": "", "Local": "本機", "Local Models": "本機模型", "Lost": "已遺失", From aa2b764d743f348f6d380aa43fd2184e49ab6745 Mon Sep 17 00:00:00 2001 From: Rory <16675082+roryeckel@users.noreply.github.com> Date: Fri, 14 Feb 2025 22:32:45 -0600 Subject: [PATCH 08/96] Finalize incomplete merge to update playwright branch Introduced feature parity for trust_env --- backend/open_webui/retrieval/web/utils.py | 59 ++++++++++++++--------- 1 file changed, 37 insertions(+), 22 deletions(-) diff --git a/backend/open_webui/retrieval/web/utils.py b/backend/open_webui/retrieval/web/utils.py index 25ca5aef5..b9fbbee3a 100644 --- a/backend/open_webui/retrieval/web/utils.py +++ b/backend/open_webui/retrieval/web/utils.py @@ -1,30 +1,33 @@ import asyncio -from datetime import datetime, time, timedelta +import logging import socket import ssl -import aiohttp -import asyncio import urllib.parse +import urllib.request +from collections import defaultdict +from datetime import datetime, time, timedelta +from typing import ( + Any, + AsyncIterator, + Dict, + Iterator, + List, + Optional, + Sequence, + Union +) +import aiohttp import certifi import validators -from collections import defaultdict -from typing import AsyncIterator, Dict, List, Optional, Union, Sequence, Iterator -from typing import Any, AsyncIterator, Dict, Iterator, List, Sequence, Union - - from langchain_community.document_loaders import ( - WebBaseLoader, - PlaywrightURLLoader + PlaywrightURLLoader, + WebBaseLoader ) from langchain_core.documents import Document - - from open_webui.constants import ERROR_MESSAGES from open_webui.config import ENABLE_RAG_LOCAL_WEB_FETCH, PLAYWRIGHT_WS_URI, RAG_WEB_LOADER from open_webui.env import SRC_LOG_LEVELS -import logging - log = logging.getLogger(__name__) log.setLevel(SRC_LOG_LEVELS["RAG"]) @@ -91,18 +94,20 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader): """Load HTML pages safely with Playwright, supporting SSL verification, rate limiting, and remote browser connection. Attributes: - urls (List[str]): List of URLs to load. + web_paths (List[str]): List of URLs to load. verify_ssl (bool): If True, verify SSL certificates. requests_per_second (Optional[float]): Number of requests per second to limit to. continue_on_failure (bool): If True, continue loading other URLs on failure. headless (bool): If True, the browser will run in headless mode. playwright_ws_url (Optional[str]): WebSocket endpoint URI for remote browser connection. + trust_env (bool): If True, use proxy settings from environment variables. """ def __init__( self, - urls: List[str], + web_paths: List[str], verify_ssl: bool = True, + trust_env: bool = False, requests_per_second: Optional[float] = None, continue_on_failure: bool = True, headless: bool = True, @@ -111,9 +116,20 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader): playwright_ws_url: Optional[str] = None ): """Initialize with additional safety parameters and remote browser support.""" + + proxy_server = proxy.get('server') if proxy else None + if trust_env and not proxy_server: + env_proxies = urllib.request.getproxies() + env_proxy_server = env_proxies.get('https') or env_proxies.get('http') + if env_proxy_server: + if proxy: + proxy['server'] = env_proxy_server + else: + proxy = { 'server': env_proxy_server } + # We'll set headless to False if using playwright_ws_url since it's handled by the remote browser super().__init__( - urls=urls, + urls=web_paths, continue_on_failure=continue_on_failure, headless=headless if playwright_ws_url is None else False, remove_selectors=remove_selectors, @@ -123,6 +139,7 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader): self.requests_per_second = requests_per_second self.last_request_time = None self.playwright_ws_url = playwright_ws_url + self.trust_env = trust_env def lazy_load(self) -> Iterator[Document]: """Safely load URLs synchronously with support for remote browser.""" @@ -347,14 +364,12 @@ def get_web_loader( # Check if the URLs are valid safe_urls = safe_validate_urls([urls] if isinstance(urls, str) else urls) - web_loader_args = { - web_path=safe_urls, - "urls": safe_urls, + "web_paths": safe_urls, "verify_ssl": verify_ssl, "requests_per_second": requests_per_second, "continue_on_failure": True, - trust_env=trust_env + "trust_env": trust_env } if PLAYWRIGHT_WS_URI.value: @@ -364,6 +379,6 @@ def get_web_loader( WebLoaderClass = RAG_WEB_LOADERS[RAG_WEB_LOADER.value] web_loader = WebLoaderClass(**web_loader_args) - log.debug("Using RAG_WEB_LOADER %s for %s URLs", web_loader.__class__.__name__, len(urls)) + log.debug("Using RAG_WEB_LOADER %s for %s URLs", web_loader.__class__.__name__, len(safe_urls)) return web_loader \ No newline at end of file From b1bab2ece88618a34deb228b72c44b8981d7db8c Mon Sep 17 00:00:00 2001 From: Rory <16675082+roryeckel@users.noreply.github.com> Date: Fri, 14 Feb 2025 22:43:46 -0600 Subject: [PATCH 09/96] Remove duplicate loader.alazy_load line from merge --- backend/open_webui/routers/retrieval.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index e79e414b1..84a49e4ca 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -1348,8 +1348,6 @@ async def process_web_search( requests_per_second=request.app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS, trust_env=request.app.state.config.RAG_WEB_SEARCH_TRUST_ENV, ) - docs = [doc async for doc in loader.alazy_load()] - # docs = loader.load() docs = await loader.aload() await run_in_threadpool( save_docs_to_vector_db, From 8e9b00a017ddd5995fe2d5f2dd4dd7dcf3955efb Mon Sep 17 00:00:00 2001 From: Rory <16675082+roryeckel@users.noreply.github.com> Date: Fri, 14 Feb 2025 22:48:15 -0600 Subject: [PATCH 10/96] Fix docstring --- backend/open_webui/retrieval/web/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/open_webui/retrieval/web/utils.py b/backend/open_webui/retrieval/web/utils.py index b9fbbee3a..0718b6b85 100644 --- a/backend/open_webui/retrieval/web/utils.py +++ b/backend/open_webui/retrieval/web/utils.py @@ -96,11 +96,12 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader): Attributes: web_paths (List[str]): List of URLs to load. verify_ssl (bool): If True, verify SSL certificates. + trust_env (bool): If True, use proxy settings from environment variables. requests_per_second (Optional[float]): Number of requests per second to limit to. continue_on_failure (bool): If True, continue loading other URLs on failure. headless (bool): If True, the browser will run in headless mode. + proxy (dict): Proxy override settings for the Playwright session. playwright_ws_url (Optional[str]): WebSocket endpoint URI for remote browser connection. - trust_env (bool): If True, use proxy settings from environment variables. """ def __init__( From bc82f48ebfa7dd1ea334efd31b9c73de2cc456c0 Mon Sep 17 00:00:00 2001 From: Rory <16675082+roryeckel@users.noreply.github.com> Date: Mon, 17 Feb 2025 21:43:32 -0600 Subject: [PATCH 11/96] refac: RAG_WEB_LOADER -> RAG_WEB_LOADER_ENGINE --- backend/open_webui/config.py | 10 +++++----- backend/open_webui/main.py | 4 ++-- backend/open_webui/retrieval/web/utils.py | 12 ++++++------ backend/start.sh | 2 +- backend/start_windows.bat | 2 +- docker-compose.playwright.yaml | 2 +- 6 files changed, 16 insertions(+), 16 deletions(-) diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index c926759ca..0c796eb49 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -1853,10 +1853,10 @@ RAG_WEB_SEARCH_CONCURRENT_REQUESTS = PersistentConfig( int(os.getenv("RAG_WEB_SEARCH_CONCURRENT_REQUESTS", "10")), ) -RAG_WEB_LOADER = PersistentConfig( - "RAG_WEB_LOADER", - "rag.web.loader", - os.environ.get("RAG_WEB_LOADER", "safe_web") +RAG_WEB_LOADER_ENGINE = PersistentConfig( + "RAG_WEB_LOADER_ENGINE", + "rag.web.loader.engine", + os.environ.get("RAG_WEB_LOADER_ENGINE", "safe_web") ) RAG_WEB_SEARCH_TRUST_ENV = PersistentConfig( @@ -1867,7 +1867,7 @@ RAG_WEB_SEARCH_TRUST_ENV = PersistentConfig( PLAYWRIGHT_WS_URI = PersistentConfig( "PLAYWRIGHT_WS_URI", - "rag.web.loader.playwright.ws.uri", + "rag.web.loader.engine.playwright.ws.uri", os.environ.get("PLAYWRIGHT_WS_URI", None) ) diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index 5035119a0..0f6f91608 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -140,7 +140,7 @@ from open_webui.config import ( AUDIO_TTS_AZURE_SPEECH_REGION, AUDIO_TTS_AZURE_SPEECH_OUTPUT_FORMAT, PLAYWRIGHT_WS_URI, - RAG_WEB_LOADER, + RAG_WEB_LOADER_ENGINE, WHISPER_MODEL, DEEPGRAM_API_KEY, WHISPER_MODEL_AUTO_UPDATE, @@ -561,7 +561,7 @@ app.state.config.EXA_API_KEY = EXA_API_KEY app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = RAG_WEB_SEARCH_RESULT_COUNT app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = RAG_WEB_SEARCH_CONCURRENT_REQUESTS -app.state.config.RAG_WEB_LOADER = RAG_WEB_LOADER +app.state.config.RAG_WEB_LOADER_ENGINE = RAG_WEB_LOADER_ENGINE app.state.config.RAG_WEB_SEARCH_TRUST_ENV = RAG_WEB_SEARCH_TRUST_ENV app.state.config.PLAYWRIGHT_WS_URI = PLAYWRIGHT_WS_URI diff --git a/backend/open_webui/retrieval/web/utils.py b/backend/open_webui/retrieval/web/utils.py index 0718b6b85..dcf1728e9 100644 --- a/backend/open_webui/retrieval/web/utils.py +++ b/backend/open_webui/retrieval/web/utils.py @@ -25,7 +25,7 @@ from langchain_community.document_loaders import ( ) from langchain_core.documents import Document from open_webui.constants import ERROR_MESSAGES -from open_webui.config import ENABLE_RAG_LOCAL_WEB_FETCH, PLAYWRIGHT_WS_URI, RAG_WEB_LOADER +from open_webui.config import ENABLE_RAG_LOCAL_WEB_FETCH, PLAYWRIGHT_WS_URI, RAG_WEB_LOADER_ENGINE from open_webui.env import SRC_LOG_LEVELS log = logging.getLogger(__name__) @@ -352,9 +352,9 @@ class SafeWebBaseLoader(WebBaseLoader): """Load data into Document objects.""" return [document async for document in self.alazy_load()] -RAG_WEB_LOADERS = defaultdict(lambda: SafeWebBaseLoader) -RAG_WEB_LOADERS["playwright"] = SafePlaywrightURLLoader -RAG_WEB_LOADERS["safe_web"] = SafeWebBaseLoader +RAG_WEB_LOADER_ENGINES = defaultdict(lambda: SafeWebBaseLoader) +RAG_WEB_LOADER_ENGINES["playwright"] = SafePlaywrightURLLoader +RAG_WEB_LOADER_ENGINES["safe_web"] = SafeWebBaseLoader def get_web_loader( urls: Union[str, Sequence[str]], @@ -377,9 +377,9 @@ def get_web_loader( web_loader_args["playwright_ws_url"] = PLAYWRIGHT_WS_URI.value # Create the appropriate WebLoader based on the configuration - WebLoaderClass = RAG_WEB_LOADERS[RAG_WEB_LOADER.value] + WebLoaderClass = RAG_WEB_LOADER_ENGINES[RAG_WEB_LOADER_ENGINE.value] web_loader = WebLoaderClass(**web_loader_args) - log.debug("Using RAG_WEB_LOADER %s for %s URLs", web_loader.__class__.__name__, len(safe_urls)) + log.debug("Using RAG_WEB_LOADER_ENGINE %s for %s URLs", web_loader.__class__.__name__, len(safe_urls)) return web_loader \ No newline at end of file diff --git a/backend/start.sh b/backend/start.sh index 3b08cf549..671c22ff7 100755 --- a/backend/start.sh +++ b/backend/start.sh @@ -4,7 +4,7 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) cd "$SCRIPT_DIR" || exit # Add conditional Playwright browser installation -if [[ "${RAG_WEB_LOADER,,}" == "playwright" ]]; then +if [[ "${RAG_WEB_LOADER_ENGINE,,}" == "playwright" ]]; then if [[ -z "${PLAYWRIGHT_WS_URI}" ]]; then echo "Installing Playwright browsers..." playwright install chromium diff --git a/backend/start_windows.bat b/backend/start_windows.bat index 036e1f721..7049cd1b3 100644 --- a/backend/start_windows.bat +++ b/backend/start_windows.bat @@ -7,7 +7,7 @@ SET "SCRIPT_DIR=%~dp0" cd /d "%SCRIPT_DIR%" || exit /b :: Add conditional Playwright browser installation -IF /I "%RAG_WEB_LOADER%" == "playwright" ( +IF /I "%RAG_WEB_LOADER_ENGINE%" == "playwright" ( IF "%PLAYWRIGHT_WS_URI%" == "" ( echo Installing Playwright browsers... playwright install chromium diff --git a/docker-compose.playwright.yaml b/docker-compose.playwright.yaml index 0a4bb3f76..fe570bed0 100644 --- a/docker-compose.playwright.yaml +++ b/docker-compose.playwright.yaml @@ -6,5 +6,5 @@ services: open-webui: environment: - - 'RAG_WEB_LOADER=playwright' + - 'RAG_WEB_LOADER_ENGINE=playwright' - 'PLAYWRIGHT_WS_URI=ws://playwright:3000' \ No newline at end of file From 9d06e3b2cd1da7eb66740039011cd4df3a25ca0b Mon Sep 17 00:00:00 2001 From: hurxxxx Date: Tue, 18 Feb 2025 21:56:16 +0900 Subject: [PATCH 12/96] feat: chat responses using the citations-disabled model to exclude source citations --- .../chat/Messages/ContentRenderer.svelte | 5 +++++ .../chat/Messages/Markdown/Source.svelte | 22 ++++++++++--------- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/src/lib/components/chat/Messages/ContentRenderer.svelte b/src/lib/components/chat/Messages/ContentRenderer.svelte index 23c8ff803..21203bed8 100644 --- a/src/lib/components/chat/Messages/ContentRenderer.svelte +++ b/src/lib/components/chat/Messages/ContentRenderer.svelte @@ -120,6 +120,11 @@ sourceIds={(sources ?? []).reduce((acc, s) => { let ids = []; s.document.forEach((document, index) => { + if (model?.info?.meta?.capabilities?.citations == false) { + ids.push('N/A'); + return ids; + } + const metadata = s.metadata?.[index]; const id = metadata?.source ?? 'N/A'; diff --git a/src/lib/components/chat/Messages/Markdown/Source.svelte b/src/lib/components/chat/Messages/Markdown/Source.svelte index 4eb1fffb7..338ca534a 100644 --- a/src/lib/components/chat/Messages/Markdown/Source.svelte +++ b/src/lib/components/chat/Messages/Markdown/Source.svelte @@ -35,13 +35,15 @@ $: attributes = extractAttributes(token.text); - +{#if attributes.title !== 'N/A'} + +{/if} From 4939d6871b033894098f5aa4566e3339b6125357 Mon Sep 17 00:00:00 2001 From: KarlLee830 <61072264+KarlLee830@users.noreply.github.com> Date: Tue, 18 Feb 2025 23:40:25 +0800 Subject: [PATCH 13/96] i18n: Update Chinese Translation --- src/lib/i18n/locales/zh-CN/translation.json | 60 ++++++++++----------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/src/lib/i18n/locales/zh-CN/translation.json b/src/lib/i18n/locales/zh-CN/translation.json index 4e26eb022..3b7dfb05d 100644 --- a/src/lib/i18n/locales/zh-CN/translation.json +++ b/src/lib/i18n/locales/zh-CN/translation.json @@ -20,7 +20,7 @@ "Account Activation Pending": "账号待激活", "Accurate information": "提供的信息很准确", "Actions": "自动化", - "Activate": "", + "Activate": "激活", "Activate this command by typing \"/{{COMMAND}}\" to chat input.": "通过输入 \"/{{COMMAND}}\" 激活此命令", "Active Users": "当前在线用户", "Add": "添加", @@ -100,7 +100,7 @@ "Audio": "语音", "August": "八月", "Authenticate": "认证", - "Authentication": "", + "Authentication": "身份验证", "Auto-Copy Response to Clipboard": "自动复制回复到剪贴板", "Auto-playback response": "自动念出回复内容", "Autocomplete Generation": "输入框内容猜测补全", @@ -167,7 +167,7 @@ "Click here to": "点击", "Click here to download user import template file.": "点击此处下载用户导入所需的模板文件。", "Click here to learn more about faster-whisper and see the available models.": "点击此处了解更多关于faster-whisper的信息,并查看可用的模型。", - "Click here to see available models.": "单击此处查看可用型号。", + "Click here to see available models.": "单击此处查看可用模型。", "Click here to select": "点击这里选择", "Click here to select a csv file.": "点击此处选择 csv 文件。", "Click here to select a py file.": "点击此处选择 py 文件。", @@ -180,12 +180,12 @@ "Clone of {{TITLE}}": "{{TITLE}} 的副本", "Close": "关闭", "Code execution": "代码执行", - "Code Execution": "", - "Code Execution Engine": "", + "Code Execution": "代码执行", + "Code Execution Engine": "代码执行引擎", "Code formatted successfully": "代码格式化成功", "Code Interpreter": "代码解释器", "Code Interpreter Engine": "代码解释引擎", - "Code Interpreter Prompt Template": "代码解释器提示模板", + "Code Interpreter Prompt Template": "代码解释器提示词模板", "Collection": "文件集", "Color": "颜色", "ComfyUI": "ComfyUI", @@ -202,7 +202,7 @@ "Confirm Password": "确认密码", "Confirm your action": "确定吗?", "Confirm your new password": "确认新密码", - "Connect to your own OpenAI compatible API endpoints.": "连接到您自己的 OpenAI 兼容 API 端点。", + "Connect to your own OpenAI compatible API endpoints.": "连接到你自己的与 OpenAI 兼容的 API 接口端点。", "Connections": "外部连接", "Constrains effort on reasoning for reasoning models. Only applicable to reasoning models from specific providers that support reasoning effort. (Default: medium)": "限制推理模型的推理努力。仅适用于支持推理努力的特定提供商的推理模型。(默认值:中等)", "Contact Admin for WebUI Access": "请联系管理员以获取访问权限", @@ -214,7 +214,7 @@ "Continue with Email": "使用邮箱登录", "Continue with LDAP": "使用 LDAP 登录", "Control how message text is split for TTS requests. 'Punctuation' splits into sentences, 'paragraphs' splits into paragraphs, and 'none' keeps the message as a single string.": "控制消息文本如何拆分以用于 TTS 请求。“Punctuation”拆分为句子,“paragraphs”拆分为段落,“none”将消息保留为单个字符串。", - "Control the repetition of token sequences in the generated text. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 1.1) will be more lenient. At 1, it is disabled. (Default: 1.1)": "", + "Control the repetition of token sequences in the generated text. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 1.1) will be more lenient. At 1, it is disabled. (Default: 1.1)": "控制生成文本中 Token 的重复。较高的值(例如 1.5)会更强烈地惩罚重复,而较低的值(例如 1.1)则更宽松。设置为 1 时,此功能被禁用。(默认值:1.1)", "Controls": "对话高级设置", "Controls the balance between coherence and diversity of the output. A lower value will result in more focused and coherent text. (Default: 5.0)": "控制输出的连贯性和多样性之间的平衡。较低的值将导致更集中和连贯的文本。(默认值:5.0)", "Copied": "已复制", @@ -270,7 +270,7 @@ "Delete folder?": "删除分组?", "Delete function?": "删除函数?", "Delete Message": "删除消息", - "Delete message?": "", + "Delete message?": "删除消息?", "Delete prompt?": "删除提示词?", "delete this link": "此处删除这个链接", "Delete tool?": "删除工具?", @@ -282,14 +282,14 @@ "Description": "描述", "Didn't fully follow instructions": "没有完全遵照指示", "Direct Connections": "直接连接", - "Direct Connections allow users to connect to their own OpenAI compatible API endpoints.": "直接连接允许用户连接到他们自己的与 OpenAI 兼容的 API 端点。", + "Direct Connections allow users to connect to their own OpenAI compatible API endpoints.": "直接连接Direct Connections 功能 允许用户连接至其自有的、兼容 OpenAI 的 API 端点。", "Direct Connections settings updated": "直接连接设置已更新", "Disabled": "禁用", "Discover a function": "发现更多函数", "Discover a model": "发现更多模型", "Discover a prompt": "发现更多提示词", "Discover a tool": "发现更多工具", - "Discover how to use Open WebUI and seek support from the community.": "", + "Discover how to use Open WebUI and seek support from the community.": "了解如何使用 Open WebUI 并寻求社区支持。", "Discover wonders": "发现奇迹", "Discover, download, and explore custom functions": "发现、下载并探索更多函数", "Discover, download, and explore custom prompts": "发现、下载并探索更多自定义提示词", @@ -314,7 +314,7 @@ "Don't like the style": "不喜欢这个文风", "Done": "完成", "Download": "下载", - "Download as SVG": "", + "Download as SVG": "下载为 SVG", "Download canceled": "下载已取消", "Download Database": "下载数据库", "Drag and drop a file to upload or select a file to view": "拖动文件上传或选择文件查看", @@ -370,7 +370,7 @@ "Enter Chunk Overlap": "输入块重叠 (Chunk Overlap)", "Enter Chunk Size": "输入块大小 (Chunk Size)", "Enter description": "输入简介描述", - "Enter domains separated by commas (e.g., example.com,site.org)": "输入以逗号分隔的域名(例如:example.com,site.org)", + "Enter domains separated by commas (e.g., example.com,site.org)": "输入以逗号分隔的域名(例如:example.com、site.org)", "Enter Exa API Key": "输入 Exa API 密钥", "Enter Github Raw URL": "输入 Github Raw 地址", "Enter Google PSE API Key": "输入 Google PSE API 密钥", @@ -455,7 +455,7 @@ "Failed to save models configuration": "无法保存模型配置", "Failed to update settings": "无法更新设置", "Failed to upload file.": "上传文件失败", - "Features": "", + "Features": "功能", "Features Permissions": "功能权限", "February": "二月", "Feedback History": "反馈历史", @@ -485,7 +485,7 @@ "Form": "手动创建", "Format your variables using brackets like this:": "使用括号格式化你的变量,如下所示:", "Frequency Penalty": "频率惩罚", - "Full Context Mode": "", + "Full Context Mode": "完整上下文模式", "Function": "函数", "Function Calling": "函数调用 (Function Calling)", "Function created successfully": "函数创建成功", @@ -601,7 +601,7 @@ "Leave empty to include all models or select specific models": "留空表示包含所有模型或请选择模型", "Leave empty to use the default prompt, or enter a custom prompt": "留空以使用默认提示词,或输入自定义提示词。", "Leave model field empty to use the default model.": "将模型字段留空以使用默认模型。", - "License": "", + "License": "授权", "Light": "浅色", "Listening...": "正在倾听...", "Llama.cpp": "Llama.cpp", @@ -761,7 +761,7 @@ "Playground": "AI 对话游乐场", "Please carefully review the following warnings:": "请仔细阅读以下警告信息:", "Please do not close the settings page while loading the model.": "加载模型时请不要关闭设置页面。", - "Please enter a prompt": "请输出一个 Prompt", + "Please enter a prompt": "请输入一个 Prompt", "Please fill in all fields.": "请填写所有字段。", "Please select a model first.": "请先选择一个模型。", "Please select a model.": "请选择一个模型。", @@ -770,7 +770,7 @@ "Positive attitude": "积极的态度", "Prefix ID": "Prefix ID", "Prefix ID is used to avoid conflicts with other connections by adding a prefix to the model IDs - leave empty to disable": "Prefix ID 用于通过为模型 ID 添加前缀来避免与其他连接发生冲突 - 留空则禁用此功能", - "Presence Penalty": "", + "Presence Penalty": "重复惩罚(Presence Penalty)", "Previous 30 days": "过去 30 天", "Previous 7 days": "过去 7 天", "Profile Image": "用户头像", @@ -807,7 +807,7 @@ "Rename": "重命名", "Reorder Models": "重新排序模型", "Repeat Last N": "重复最后 N 次", - "Repeat Penalty (Ollama)": "", + "Repeat Penalty (Ollama)": "重复惩罚(Ollama)", "Reply in Thread": "在主题中回复", "Request Mode": "请求模式", "Reranking Model": "重排模型", @@ -870,7 +870,7 @@ "Select a pipeline": "选择一个管道", "Select a pipeline url": "选择一个管道 URL", "Select a tool": "选择一个工具", - "Select an auth method": "选择身份验证方法", + "Select an auth method": "选择身份验证方式", "Select an Ollama instance": "选择一个 Ollama 实例。", "Select Engine": "选择引擎", "Select Knowledge": "选择知识", @@ -904,10 +904,10 @@ "Set the number of worker threads used for computation. This option controls how many threads are used to process incoming requests concurrently. Increasing this value can improve performance under high concurrency workloads but may also consume more CPU resources.": "设置用于计算的工作线程数量。该选项可控制并发处理传入请求的线程数量。增加该值可以提高高并发工作负载下的性能,但也可能消耗更多的 CPU 资源。", "Set Voice": "设置音色", "Set whisper model": "设置 whisper 模型", - "Sets a flat bias against tokens that have appeared at least once. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. At 0, it is disabled. (Default: 0)": "", - "Sets a scaling bias against tokens to penalize repetitions, based on how many times they have appeared. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. At 0, it is disabled. (Default: 1.1)": "", + "Sets a flat bias against tokens that have appeared at least once. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. At 0, it is disabled. (Default: 0)": "这个设置项用于调整对重复 tokens 的抑制强度。当某个 token 至少出现过一次后,系统会通过 flat bias 参数施加惩罚力度:数值越大(如 1.5),抑制重复的效果越强烈;数值较小(如 0.9)则相对宽容。当设为 0 时,系统会完全关闭这个重复抑制功能(默认值为 0)。", + "Sets a scaling bias against tokens to penalize repetitions, based on how many times they have appeared. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. At 0, it is disabled. (Default: 1.1)": "这个参数用于通过 scaling bias 机制抑制重复内容:当某些 tokens 重复出现时,系统会根据它们已出现的次数自动施加惩罚。数值越大(如 1.5)惩罚力度越强,能更有效减少重复;数值较小(如 0.9)则允许更多重复。当设为 0 时完全关闭该功能,默认值设置为 1.1 保持适度抑制。", "Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)": "设置模型回溯多远以防止重复。(默认值:64,0 = 禁用,-1 = num_ctx)", - "Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt. (Default: random)": "设置生成文本时使用的随机数种子。将其设置为一个特定的数字将使模型在同一提示下生成相同的文本。 默认值:随机", + "Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt. (Default: random)": "设置 random number seed 可以控制模型生成文本的随机起点。如果指定一个具体数字,当输入相同的提示语时,模型每次都会生成完全相同的文本内容(默认是随机选取 seed)。", "Sets the size of the context window used to generate the next token. (Default: 2048)": "设置用于生成下一个 Token 的上下文大小。(默认值:2048)", "Sets the stop sequences to use. When this pattern is encountered, the LLM will stop generating text and return. Multiple stop patterns may be set by specifying multiple separate stop parameters in a modelfile.": "设置要使用的停止序列。遇到这种模式时,大语言模型将停止生成文本并返回。可以通过在模型文件中指定多个单独的停止参数来设置多个停止模式。", "Settings": "设置", @@ -952,7 +952,7 @@ "Tags Generation Prompt": "标签生成提示词", "Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. (default: 1)": "Tail free sampling 用于减少输出中可能性较低的标记的影响。数值越大(如 2.0),影响就越小,而数值为 1.0 则会禁用此设置。(默认值:1)", "Tap to interrupt": "点击以中断", - "Tasks": "", + "Tasks": "任务", "Tavily API Key": "Tavily API 密钥", "Tell us more:": "请告诉我们更多细节", "Temperature": "温度 (Temperature)", @@ -975,7 +975,7 @@ "The score should be a value between 0.0 (0%) and 1.0 (100%).": "分值应介于 0.0(0%)和 1.0(100%)之间。", "The temperature of the model. Increasing the temperature will make the model answer more creatively. (Default: 0.8)": "模型的温度。提高温度将使模型更具创造性地回答。(默认值:0.8)", "Theme": "主题", - "Thinking...": "正在思考...", + "Thinking...": "正在深度思考...", "This action cannot be undone. Do you wish to continue?": "此操作无法撤销。是否确认继续?", "This ensures that your valuable conversations are securely saved to your backend database. Thank you!": "这将确保您的宝贵对话被安全地保存到后台数据库中。感谢!", "This is an experimental feature, it may not function as expected and is subject to change at any time.": "这是一个实验功能,可能不会如预期那样工作,而且可能随时发生变化。", @@ -989,8 +989,8 @@ "This will delete all models including custom models and cannot be undone.": "这将删除所有模型,包括自定义模型,且无法撤销。", "This will reset the knowledge base and sync all files. Do you wish to continue?": "这将重置知识库并替换所有文件为目录下文件。确认继续?", "Thorough explanation": "解释较为详细", - "Thought for {{DURATION}}": "已推理 持续 {{DURATION}}", - "Thought for {{DURATION}} seconds": "已推理 持续 {{DURATION}} 秒", + "Thought for {{DURATION}}": "已深度思考 用时 {{DURATION}}", + "Thought for {{DURATION}} seconds": "已深度思考 用时 {{DURATION}} 秒", "Tika": "Tika", "Tika Server URL required.": "请输入 Tika 服务器地址。", "Tiktoken": "Tiktoken", @@ -1011,7 +1011,7 @@ "To select actions here, add them to the \"Functions\" workspace first.": "要在这里选择自动化,请先将其添加到工作空间中的“函数”。", "To select filters here, add them to the \"Functions\" workspace first.": "要在这里选择过滤器,请先将其添加到工作空间中的“函数”。", "To select toolkits here, add them to the \"Tools\" workspace first.": "要在这里选择工具包,请先将其添加到工作空间中的“工具”。", - "Toast notifications for new updates": "新更新的弹窗提示", + "Toast notifications for new updates": "更新后弹窗提示更新内容", "Today": "今天", "Toggle settings": "切换设置", "Toggle sidebar": "切换侧边栏", @@ -1056,7 +1056,7 @@ "Updated": "已更新", "Updated at": "更新于", "Updated At": "更新于", - "Upgrade to a licensed plan for enhanced capabilities, including custom theming and branding, and dedicated support.": "", + "Upgrade to a licensed plan for enhanced capabilities, including custom theming and branding, and dedicated support.": "升级到授权计划以获得增强功能,包括自定义主题与品牌以及专属支持。", "Upload": "上传", "Upload a GGUF model": "上传一个 GGUF 模型", "Upload directory": "上传目录", @@ -1095,7 +1095,7 @@ "Warning:": "警告:", "Warning: Enabling this will allow users to upload arbitrary code on the server.": "警告:启用此功能将允许用户在服务器上上传任意代码。", "Warning: If you update or change your embedding model, you will need to re-import all documents.": "警告:如果您修改了语义向量模型,则需要重新导入所有文档。", - "Warning: Jupyter execution enables arbitrary code execution, posing severe security risks—proceed with extreme caution.": "", + "Warning: Jupyter execution enables arbitrary code execution, posing severe security risks—proceed with extreme caution.": "警告:Jupyter 执行允许任意代码执行,存在严重的安全风险——请极其谨慎地操作。", "Web": "网页", "Web API": "网页 API", "Web Loader Settings": "网页爬取设置", From d4743b1a17ed52405ba205c1c19143b2c23ad15e Mon Sep 17 00:00:00 2001 From: KarlLee830 <61072264+KarlLee830@users.noreply.github.com> Date: Tue, 18 Feb 2025 23:42:22 +0800 Subject: [PATCH 14/96] i18n: Update Chinese Translation --- src/lib/i18n/locales/zh-CN/translation.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/i18n/locales/zh-CN/translation.json b/src/lib/i18n/locales/zh-CN/translation.json index 3b7dfb05d..e891396a3 100644 --- a/src/lib/i18n/locales/zh-CN/translation.json +++ b/src/lib/i18n/locales/zh-CN/translation.json @@ -282,7 +282,7 @@ "Description": "描述", "Didn't fully follow instructions": "没有完全遵照指示", "Direct Connections": "直接连接", - "Direct Connections allow users to connect to their own OpenAI compatible API endpoints.": "直接连接Direct Connections 功能 允许用户连接至其自有的、兼容 OpenAI 的 API 端点。", + "Direct Connections allow users to connect to their own OpenAI compatible API endpoints.": "直接连接功能允许用户连接至其自有的、兼容 OpenAI 的 API 端点。", "Direct Connections settings updated": "直接连接设置已更新", "Disabled": "禁用", "Discover a function": "发现更多函数", From 55b0ac85d187e043ccadd6d31a28cc66f89ad589 Mon Sep 17 00:00:00 2001 From: juxiang <73006913+juquxiang@users.noreply.github.com> Date: Wed, 19 Feb 2025 00:53:38 +0800 Subject: [PATCH 15/96] Added user filtering by email and username Added user filtering by email and username --- src/lib/components/admin/Users/UserList.svelte | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/lib/components/admin/Users/UserList.svelte b/src/lib/components/admin/Users/UserList.svelte index 3f7832517..7f8e516fb 100644 --- a/src/lib/components/admin/Users/UserList.svelte +++ b/src/lib/components/admin/Users/UserList.svelte @@ -85,8 +85,9 @@ return true; } else { let name = user.name.toLowerCase(); + let email = user.email.toLowerCase(); const query = search.toLowerCase(); - return name.includes(query); + return name.includes(query) || email.includes(query); } }) .sort((a, b) => { From 925bfe840b46df424360f230a93e748289df0139 Mon Sep 17 00:00:00 2001 From: mikhail-khludnev Date: Tue, 18 Feb 2025 16:39:02 +0300 Subject: [PATCH 16/96] dedupe results from multiple queries --- backend/open_webui/retrieval/utils.py | 35 ++++++++++++++++----------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/backend/open_webui/retrieval/utils.py b/backend/open_webui/retrieval/utils.py index 437183369..e5ba55878 100644 --- a/backend/open_webui/retrieval/utils.py +++ b/backend/open_webui/retrieval/utils.py @@ -138,37 +138,44 @@ def query_doc_with_hybrid_search( def merge_and_sort_query_results( - query_results: list[dict], k: int, reverse: bool = False + query_results: list[dict], k: int, reverse: bool = False ) -> list[dict]: # Initialize lists to store combined data combined_distances = [] combined_documents = [] combined_metadatas = [] + combined_ids = [] for data in query_results: combined_distances.extend(data["distances"][0]) combined_documents.extend(data["documents"][0]) combined_metadatas.extend(data["metadatas"][0]) + # DISTINCT(chunk_id,file_id) - in case if id (chunk_ids) become ordinals + combined_ids.extend([id + meta["file_id"] for id, meta in zip(data["ids"][0], data["metadatas"][0])]) - # Create a list of tuples (distance, document, metadata) - combined = list(zip(combined_distances, combined_documents, combined_metadatas)) + # Create a list of tuples (distance, document, metadata, ids) + combined = list(zip(combined_distances, combined_documents, combined_metadatas, combined_ids)) # Sort the list based on distances combined.sort(key=lambda x: x[0], reverse=reverse) - # We don't have anything :-( - if not combined: - sorted_distances = [] - sorted_documents = [] - sorted_metadatas = [] - else: + sorted_distances = [] + sorted_documents = [] + sorted_metadatas = [] + # Otherwise we don't have anything :-( + if combined: # Unzip the sorted list - sorted_distances, sorted_documents, sorted_metadatas = zip(*combined) - + all_distances, all_documents, all_metadatas, all_ids = zip(*combined) + seen_ids = set() # Slicing the lists to include only k elements - sorted_distances = list(sorted_distances)[:k] - sorted_documents = list(sorted_documents)[:k] - sorted_metadatas = list(sorted_metadatas)[:k] + for index, id in enumerate(all_ids): + if id not in seen_ids: + sorted_distances.append(all_distances[index]) + sorted_documents.append(all_documents[index]) + sorted_metadatas.append(all_metadatas[index]) + seen_ids.add(id) + if len(sorted_distances) >= k: + break # Create the output dictionary result = { From 6c6be5de886f07c64e170b70865b56718d6809f5 Mon Sep 17 00:00:00 2001 From: Ranjan Mohan Date: Sat, 8 Feb 2025 22:37:24 -0700 Subject: [PATCH 17/96] Fixed an issue with clearing application cookies during OAuth signout Closes #8885. During the OAuth signout flow, although the `token` and `oauth_id_token` cookies were marked for deletion, a new RedirectResponse is created and returned. This does not contain the header info from the he Response object used to mark the cookies to be deleted. Hence the cookies remained. Fixed this by re-using the headers from the other Response object. --- backend/open_webui/routers/auths.py | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/open_webui/routers/auths.py b/backend/open_webui/routers/auths.py index a3f2e8b32..494ba3611 100644 --- a/backend/open_webui/routers/auths.py +++ b/backend/open_webui/routers/auths.py @@ -546,6 +546,7 @@ async def signout(request: Request, response: Response): if logout_url: response.delete_cookie("oauth_id_token") return RedirectResponse( + headers=response.headers, url=f"{logout_url}?id_token_hint={oauth_id_token}" ) else: From 359f8b67f8e9c6c1275fbeac21a4d198adbfd321 Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Tue, 18 Feb 2025 09:54:31 -0800 Subject: [PATCH 18/96] fix: mobile hover issue --- src/tailwind.css | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/tailwind.css b/src/tailwind.css index e6960f2aa..f4e0c0cdd 100644 --- a/src/tailwind.css +++ b/src/tailwind.css @@ -36,3 +36,5 @@ @apply cursor-pointer; } } + +@custom-variant hover (&:hover); From d0114e0703b21c64419ea121309b1a61f33bcf65 Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Tue, 18 Feb 2025 09:57:12 -0800 Subject: [PATCH 19/96] fix: temp chat issue --- backend/open_webui/utils/middleware.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/backend/open_webui/utils/middleware.py b/backend/open_webui/utils/middleware.py index 93edc8f72..e09c84f96 100644 --- a/backend/open_webui/utils/middleware.py +++ b/backend/open_webui/utils/middleware.py @@ -1359,7 +1359,15 @@ async def process_chat_response( tool_calls = [] - last_assistant_message = get_last_assistant_message(form_data["messages"]) + last_assistant_message = None + try: + if form_data["messages"][-1]["role"] == "assistant": + last_assistant_message = get_last_assistant_message( + form_data["messages"] + ) + except Exception as e: + pass + content = ( message.get("content", "") if message From fd3c24af4e1ea80a45a4b26271cb8d08fb12a42a Mon Sep 17 00:00:00 2001 From: Chris Pietschmann Date: Tue, 18 Feb 2025 13:25:31 -0500 Subject: [PATCH 20/96] Add AzureStorageProvider --- backend/open_webui/storage/provider.py | 76 +++++++++++++++++++ .../test/apps/webui/storage/test_provider.py | 4 + backend/requirements.txt | 4 + 3 files changed, 84 insertions(+) diff --git a/backend/open_webui/storage/provider.py b/backend/open_webui/storage/provider.py index b03cf0a7e..43f4a6922 100644 --- a/backend/open_webui/storage/provider.py +++ b/backend/open_webui/storage/provider.py @@ -15,12 +15,19 @@ from open_webui.config import ( S3_SECRET_ACCESS_KEY, GCS_BUCKET_NAME, GOOGLE_APPLICATION_CREDENTIALS_JSON, + AZURE_STORAGE_ENDPOINT, + AZURE_STORAGE_CONTAINER_NAME, + AZURE_STORAGE_KEY, STORAGE_PROVIDER, UPLOAD_DIR, ) from google.cloud import storage from google.cloud.exceptions import GoogleCloudError, NotFound from open_webui.constants import ERROR_MESSAGES +from azure.identity import DefaultAzureCredential +from azure.storage.blob import BlobServiceClient +from azure.core.exceptions import ResourceNotFoundError + class StorageProvider(ABC): @@ -221,6 +228,73 @@ class GCSStorageProvider(StorageProvider): LocalStorageProvider.delete_all_files() + +class AzureStorageProvider(StorageProvider): + def __init__(self): + self.endpoint = AZURE_STORAGE_ENDPOINT + self.container_name = AZURE_STORAGE_CONTAINER_NAME + storage_key = AZURE_STORAGE_KEY + + if storage_key: + # Configure using the Azure Storage Account Endpoint and Key + self.blob_service_client = BlobServiceClient( + account_url=self.endpoint, credential=storage_key + ) + else: + # Configure using the Azure Storage Account Endpoint and DefaultAzureCredential + # If the key is not configured, then the DefaultAzureCredential will be used to support Managed Identity authentication + self.blob_service_client = BlobServiceClient( + account_url=self.endpoint, credential=DefaultAzureCredential() + ) + self.container_client = self.blob_service_client.get_container_client(self.container_name) + + def upload_file(self, file: BinaryIO, filename: str) -> Tuple[bytes, str]: + """Handles uploading of the file to Azure Blob Storage.""" + contents, file_path = LocalStorageProvider.upload_file(file, filename) + try: + blob_client = self.container_client.get_blob_client(filename) + blob_client.upload_blob(contents, overwrite=True) + return contents, f"{self.endpoint}/{self.container_name}/{filename}" + except Exception as e: + raise RuntimeError(f"Error uploading file to Azure Blob Storage: {e}") + + def get_file(self, file_path: str) -> str: + """Handles downloading of the file from Azure Blob Storage.""" + try: + filename = file_path.split("/")[-1] + local_file_path = f"{UPLOAD_DIR}/{filename}" + blob_client = self.container_client.get_blob_client(filename) + with open(local_file_path, "wb") as download_file: + download_file.write(blob_client.download_blob().readall()) + return local_file_path + except ResourceNotFoundError as e: + raise RuntimeError(f"Error downloading file from Azure Blob Storage: {e}") + + def delete_file(self, file_path: str) -> None: + """Handles deletion of the file from Azure Blob Storage.""" + try: + filename = file_path.split("/")[-1] + blob_client = self.container_client.get_blob_client(filename) + blob_client.delete_blob() + except ResourceNotFoundError as e: + raise RuntimeError(f"Error deleting file from Azure Blob Storage: {e}") + + # Always delete from local storage + LocalStorageProvider.delete_file(file_path) + + def delete_all_files(self) -> None: + """Handles deletion of all files from Azure Blob Storage.""" + try: + blobs = self.container_client.list_blobs() + for blob in blobs: + self.container_client.delete_blob(blob.name) + except Exception as e: + raise RuntimeError(f"Error deleting all files from Azure Blob Storage: {e}") + + # Always delete from local storage + LocalStorageProvider.delete_all_files() + + def get_storage_provider(storage_provider: str): if storage_provider == "local": Storage = LocalStorageProvider() @@ -228,6 +302,8 @@ def get_storage_provider(storage_provider: str): Storage = S3StorageProvider() elif storage_provider == "gcs": Storage = GCSStorageProvider() + elif storage_provider == "azure": + Storage = AzureStorageProvider() else: raise RuntimeError(f"Unsupported storage provider: {storage_provider}") return Storage diff --git a/backend/open_webui/test/apps/webui/storage/test_provider.py b/backend/open_webui/test/apps/webui/storage/test_provider.py index 863106e75..4c3112526 100644 --- a/backend/open_webui/test/apps/webui/storage/test_provider.py +++ b/backend/open_webui/test/apps/webui/storage/test_provider.py @@ -22,6 +22,7 @@ def test_imports(): provider.LocalStorageProvider provider.S3StorageProvider provider.GCSStorageProvider + provider.AzureStorageProvider provider.Storage @@ -32,6 +33,8 @@ def test_get_storage_provider(): assert isinstance(Storage, provider.S3StorageProvider) Storage = provider.get_storage_provider("gcs") assert isinstance(Storage, provider.GCSStorageProvider) + Storage = provider.get_storage_provider("azure") + assert isinstance(Storage, provider.AzureStorageProvider) with pytest.raises(RuntimeError): provider.get_storage_provider("invalid") @@ -48,6 +51,7 @@ def test_class_instantiation(): provider.LocalStorageProvider() provider.S3StorageProvider() provider.GCSStorageProvider() + provider.AzureStorageProvider() class TestLocalStorageProvider: diff --git a/backend/requirements.txt b/backend/requirements.txt index 9b859b84a..f8e5f6684 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -103,5 +103,9 @@ pytest-docker~=3.1.1 googleapis-common-protos==1.63.2 google-cloud-storage==2.19.0 +azure-identity==1.20.0 +azure-storage-blob==12.24.1 + + ## LDAP ldap3==2.9.1 From e4febfa0974c6433b97f5e483430a33bb88e9941 Mon Sep 17 00:00:00 2001 From: Chris Pietschmann Date: Tue, 18 Feb 2025 13:25:49 -0500 Subject: [PATCH 21/96] Add AzureStorageProvider config options --- backend/open_webui/config.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index adfdcfec8..6bc2b7636 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -668,6 +668,10 @@ GOOGLE_APPLICATION_CREDENTIALS_JSON = os.environ.get( "GOOGLE_APPLICATION_CREDENTIALS_JSON", None ) +AZURE_STORAGE_ENDPOINT = os.environ.get("AZURE_STORAGE_ENDPOINT", None) +AZURE_STORAGE_CONTAINER_NAME = os.environ.get("AZURE_STORAGE_CONTAINER_NAME", "open-webui") +AZURE_STORAGE_KEY = os.environ.get("AZURE_STORAGE_KEY", None) + #################################### # File Upload DIR #################################### From aee57107bcafac2e7a363c9a9b7b046398c00c55 Mon Sep 17 00:00:00 2001 From: Chris Pietschmann Date: Tue, 18 Feb 2025 13:27:37 -0500 Subject: [PATCH 22/96] Update config.py --- backend/open_webui/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index 6bc2b7636..176511948 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -669,7 +669,7 @@ GOOGLE_APPLICATION_CREDENTIALS_JSON = os.environ.get( ) AZURE_STORAGE_ENDPOINT = os.environ.get("AZURE_STORAGE_ENDPOINT", None) -AZURE_STORAGE_CONTAINER_NAME = os.environ.get("AZURE_STORAGE_CONTAINER_NAME", "open-webui") +AZURE_STORAGE_CONTAINER_NAME = os.environ.get("AZURE_STORAGE_CONTAINER_NAME", None) AZURE_STORAGE_KEY = os.environ.get("AZURE_STORAGE_KEY", None) #################################### From cc4598c41baea60dbaee2b26eab77d0e489bc876 Mon Sep 17 00:00:00 2001 From: Chris Pietschmann Date: Tue, 18 Feb 2025 13:39:27 -0500 Subject: [PATCH 23/96] Update build-release.yml --- .github/workflows/build-release.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build-release.yml b/.github/workflows/build-release.yml index 443d90419..ac2e28130 100644 --- a/.github/workflows/build-release.yml +++ b/.github/workflows/build-release.yml @@ -4,6 +4,7 @@ on: push: branches: - main # or whatever branch you want to use + - azure-storage jobs: release: From 0b1e30988a2df23ffe89342be8e69ecb9a481a80 Mon Sep 17 00:00:00 2001 From: Elkana Bardugo Date: Tue, 18 Feb 2025 21:08:25 +0200 Subject: [PATCH 24/96] Update MarkdownTokens.svelte More dir="auto" to auto direction on RTL --- .../chat/Messages/Markdown/MarkdownTokens.svelte | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/lib/components/chat/Messages/Markdown/MarkdownTokens.svelte b/src/lib/components/chat/Messages/Markdown/MarkdownTokens.svelte index 36cac4d17..0c5244882 100644 --- a/src/lib/components/chat/Messages/Markdown/MarkdownTokens.svelte +++ b/src/lib/components/chat/Messages/Markdown/MarkdownTokens.svelte @@ -76,7 +76,7 @@ {#if token.type === 'hr'}
{:else if token.type === 'heading'} - + {:else if token.type === 'code'} @@ -176,7 +176,7 @@ {#if token.ordered}
    {#each token.items as item, itemIdx} -
  1. +
  2. {#if item?.task} {#each token.items as item, itemIdx} -
  3. +
  4. {#if item?.task} {/if} {:else if token.type === 'details'} - +
    Date: Tue, 18 Feb 2025 14:09:00 -0500 Subject: [PATCH 25/96] add tests --- .github/workflows/integration-test.disabled | 1 + .../test/apps/webui/storage/test_provider.py | 100 +++++++++++++++++- 2 files changed, 100 insertions(+), 1 deletion(-) diff --git a/.github/workflows/integration-test.disabled b/.github/workflows/integration-test.disabled index b248df4b5..946735c5b 100644 --- a/.github/workflows/integration-test.disabled +++ b/.github/workflows/integration-test.disabled @@ -5,6 +5,7 @@ on: branches: - main - dev + - azure-storage pull_request: branches: - main diff --git a/backend/open_webui/test/apps/webui/storage/test_provider.py b/backend/open_webui/test/apps/webui/storage/test_provider.py index 4c3112526..5cfb1b6a8 100644 --- a/backend/open_webui/test/apps/webui/storage/test_provider.py +++ b/backend/open_webui/test/apps/webui/storage/test_provider.py @@ -3,10 +3,11 @@ import os import boto3 import pytest from botocore.exceptions import ClientError -from moto import mock_aws +from moto import mock_aws, mock_azure from open_webui.storage import provider from gcp_storage_emulator.server import create_server from google.cloud import storage +from azure.storage.blob import BlobServiceClient def mock_upload_dir(monkeypatch, tmp_path): @@ -276,3 +277,100 @@ class TestGCSStorageProvider: assert not (upload_dir / self.filename_extra).exists() assert self.Storage.bucket.get_blob(self.filename) == None assert self.Storage.bucket.get_blob(self.filename_extra) == None + + +class TestAzureStorageProvider: + def __init__(self): + self.Storage = provider.AzureStorageProvider() + self.Storage.container_name = "my-container" + self.file_content = b"test content" + self.filename = "test.txt" + self.filename_extra = "test_exyta.txt" + self.file_bytesio_empty = io.BytesIO() + super().__init__() + + @pytest.fixture(scope="class") + def setup(self, monkeypatch): + connection_string = "DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtl6rE4rWlgEoMF1rA==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;" + self.Storage.blob_service_client = BlobServiceClient.from_connection_string(connection_string) + self.Storage.container_client = self.Storage.blob_service_client.get_container_client(self.Storage.container_name) + monkeypatch.setattr(self.Storage, "blob_service_client", self.Storage.blob_service_client) + monkeypatch.setattr(self.Storage, "container_client", self.Storage.container_client) + yield + self.Storage.container_client.delete_container() + + def test_upload_file(self, monkeypatch, tmp_path, setup): + upload_dir = mock_upload_dir(monkeypatch, tmp_path) + # Azure checks + with pytest.raises(Exception): + self.Storage.upload_file(io.BytesIO(self.file_content), self.filename) + self.Storage.create_container() + contents, azure_file_path = self.Storage.upload_file( + io.BytesIO(self.file_content), self.filename + ) + blob = self.Storage.blob_service_client.get_blob_client( + container=self.Storage.container_name, blob=self.filename + ) + assert self.file_content == blob.download_blob().readall() + # local checks + assert (upload_dir / self.filename).exists() + assert (upload_dir / self.filename).read_bytes() == self.file_content + assert contents == self.file_content + assert azure_file_path == "azure://" + self.Storage.container_name + "/" + self.filename + with pytest.raises(ValueError): + self.Storage.upload_file(self.file_bytesio_empty, self.filename) + + def test_get_file(self, monkeypatch, tmp_path, setup): + upload_dir = mock_upload_dir(monkeypatch, tmp_path) + self.Storage.create_container() + contents, azure_file_path = self.Storage.upload_file( + io.BytesIO(self.file_content), self.filename + ) + file_path = self.Storage.get_file(azure_file_path) + assert file_path == str(upload_dir / self.filename) + assert (upload_dir / self.filename).exists() + + def test_delete_file(self, monkeypatch, tmp_path, setup): + upload_dir = mock_upload_dir(monkeypatch, tmp_path) + self.Storage.create_container() + contents, azure_file_path = self.Storage.upload_file( + io.BytesIO(self.file_content), self.filename + ) + assert (upload_dir / self.filename).exists() + self.Storage.delete_file(azure_file_path) + assert not (upload_dir / self.filename).exists() + blob = self.Storage.blob_service_client.get_blob_client( + container=self.Storage.container_name, blob=self.filename + ) + with pytest.raises(Exception): + blob.download_blob().readall() + + def test_delete_all_files(self, monkeypatch, tmp_path, setup): + upload_dir = mock_upload_dir(monkeypatch, tmp_path) + self.Storage.create_container() + self.Storage.upload_file(io.BytesIO(self.file_content), self.filename) + blob = self.Storage.blob_service_client.get_blob_client( + container=self.Storage.container_name, blob=self.filename + ) + assert self.file_content == blob.download_blob().readall() + assert (upload_dir / self.filename).exists() + self.Storage.upload_file(io.BytesIO(self.file_content), self.filename_extra) + blob = self.Storage.blob_service_client.get_blob_client( + container=self.Storage.container_name, blob=self.filename_extra + ) + assert self.file_content == blob.download_blob().readall() + assert (upload_dir / self.filename_extra).exists() + + self.Storage.delete_all_files() + assert not (upload_dir / self.filename).exists() + assert not (upload_dir / self.filename_extra).exists() + blob = self.Storage.blob_service_client.get_blob_client( + container=self.Storage.container_name, blob=self.filename + ) + with pytest.raises(Exception): + blob.download_blob().readall() + blob = self.Storage.blob_service_client.get_blob_client( + container=self.Storage.container_name, blob=self.filename_extra + ) + with pytest.raises(Exception): + blob.download_blob().readall() \ No newline at end of file From cb0666ac216ab5794e4ef795e367929c4dbec91a Mon Sep 17 00:00:00 2001 From: Chris Pietschmann Date: Tue, 18 Feb 2025 14:13:54 -0500 Subject: [PATCH 26/96] update --- .github/workflows/build-release.yml | 1 - .github/workflows/integration-test.disabled | 1 - 2 files changed, 2 deletions(-) diff --git a/.github/workflows/build-release.yml b/.github/workflows/build-release.yml index ac2e28130..443d90419 100644 --- a/.github/workflows/build-release.yml +++ b/.github/workflows/build-release.yml @@ -4,7 +4,6 @@ on: push: branches: - main # or whatever branch you want to use - - azure-storage jobs: release: diff --git a/.github/workflows/integration-test.disabled b/.github/workflows/integration-test.disabled index 946735c5b..b248df4b5 100644 --- a/.github/workflows/integration-test.disabled +++ b/.github/workflows/integration-test.disabled @@ -5,7 +5,6 @@ on: branches: - main - dev - - azure-storage pull_request: branches: - main From 61644f216e5ace539f8de72f691f3ec2c617fb0a Mon Sep 17 00:00:00 2001 From: Elkana Bardugo Date: Tue, 18 Feb 2025 21:30:25 +0200 Subject: [PATCH 27/96] Update CHANGELOG.md --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a61d81f46..358fda15b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.5.15] + +### Added + +- **🌍 Enhanced Internationalization (i18n)**: Improved right-to-left languages experience with automatic text direction handling in chat and sidebar + ## [0.5.14] - 2025-02-17 ### Fixed From b86f8df29f6f2101787350990401b51b0341db22 Mon Sep 17 00:00:00 2001 From: Chris Pietschmann Date: Tue, 18 Feb 2025 14:37:10 -0500 Subject: [PATCH 28/96] Update test_provider.py --- .../test/apps/webui/storage/test_provider.py | 144 +++++++++++------- 1 file changed, 91 insertions(+), 53 deletions(-) diff --git a/backend/open_webui/test/apps/webui/storage/test_provider.py b/backend/open_webui/test/apps/webui/storage/test_provider.py index 5cfb1b6a8..e434523f8 100644 --- a/backend/open_webui/test/apps/webui/storage/test_provider.py +++ b/backend/open_webui/test/apps/webui/storage/test_provider.py @@ -7,7 +7,8 @@ from moto import mock_aws, mock_azure from open_webui.storage import provider from gcp_storage_emulator.server import create_server from google.cloud import storage -from azure.storage.blob import BlobServiceClient +from azure.storage.blob import BlobServiceClient, BlobContainerClient, BlobClient +from unittest.mock import MagicMock def mock_upload_dir(monkeypatch, tmp_path): @@ -279,98 +280,135 @@ class TestGCSStorageProvider: assert self.Storage.bucket.get_blob(self.filename_extra) == None + class TestAzureStorageProvider: def __init__(self): self.Storage = provider.AzureStorageProvider() + self.Storage.endpoint = "https://myaccount.blob.core.windows.net" self.Storage.container_name = "my-container" self.file_content = b"test content" self.filename = "test.txt" - self.filename_extra = "test_exyta.txt" + self.filename_extra = "test_extra.txt" self.file_bytesio_empty = io.BytesIO() super().__init__() - @pytest.fixture(scope="class") + @pytest.fixture def setup(self, monkeypatch): - connection_string = "DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtl6rE4rWlgEoMF1rA==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;" - self.Storage.blob_service_client = BlobServiceClient.from_connection_string(connection_string) - self.Storage.container_client = self.Storage.blob_service_client.get_container_client(self.Storage.container_name) - monkeypatch.setattr(self.Storage, "blob_service_client", self.Storage.blob_service_client) - monkeypatch.setattr(self.Storage, "container_client", self.Storage.container_client) + """Mock BlobServiceClient and BlobContainerClient for local testing""" + # Create mock Blob Service Client + mock_blob_service_client = MagicMock() + mock_container_client = MagicMock() + mock_blob_client = MagicMock() + + # Set up return values + mock_blob_service_client.get_container_client.return_value = mock_container_client + mock_container_client.get_blob_client.return_value = mock_blob_client + + # Mock `from_connection_string` and `BlobServiceClient` constructor + monkeypatch.setattr("azure.storage.blob.BlobServiceClient", lambda *_: mock_blob_service_client) + + # Apply to instance variables + self.Storage.blob_service_client = mock_blob_service_client + self.Storage.container_client = mock_container_client + yield - self.Storage.container_client.delete_container() def test_upload_file(self, monkeypatch, tmp_path, setup): + """Test uploading a file to mocked Azure Storage.""" upload_dir = mock_upload_dir(monkeypatch, tmp_path) - # Azure checks + + # Simulate an error when container does not exist + self.Storage.container_client.get_blob_client.side_effect = Exception("Container does not exist") + with pytest.raises(Exception): self.Storage.upload_file(io.BytesIO(self.file_content), self.filename) + + # Reset side effect and create container + self.Storage.container_client.get_blob_client.side_effect = None + self.Storage.create_container() contents, azure_file_path = self.Storage.upload_file( io.BytesIO(self.file_content), self.filename ) - blob = self.Storage.blob_service_client.get_blob_client( - container=self.Storage.container_name, blob=self.filename - ) - assert self.file_content == blob.download_blob().readall() - # local checks + + # Assertions + self.Storage.container_client.get_blob_client.assert_called_with(self.filename) + self.Storage.container_client.get_blob_client().upload_blob.assert_called_once_with(self.file_content, overwrite=True) + + assert contents == self.file_content + assert azure_file_path == f"https://myaccount.blob.core.windows.net/{self.Storage.container_name}/{self.filename}" assert (upload_dir / self.filename).exists() assert (upload_dir / self.filename).read_bytes() == self.file_content - assert contents == self.file_content - assert azure_file_path == "azure://" + self.Storage.container_name + "/" + self.filename + with pytest.raises(ValueError): self.Storage.upload_file(self.file_bytesio_empty, self.filename) def test_get_file(self, monkeypatch, tmp_path, setup): + """Test retrieving a file from mocked Azure Storage.""" upload_dir = mock_upload_dir(monkeypatch, tmp_path) self.Storage.create_container() - contents, azure_file_path = self.Storage.upload_file( - io.BytesIO(self.file_content), self.filename - ) - file_path = self.Storage.get_file(azure_file_path) + + # Mock upload behavior + self.Storage.upload_file(io.BytesIO(self.file_content), self.filename) + + # Mock blob download behavior + self.Storage.container_client.get_blob_client().download_blob().readall.return_value = self.file_content + + file_path = self.Storage.get_file(f"https://myaccount.blob.core.windows.net/{self.Storage.container_name}/{self.filename}") + assert file_path == str(upload_dir / self.filename) assert (upload_dir / self.filename).exists() + assert (upload_dir / self.filename).read_bytes() == self.file_content def test_delete_file(self, monkeypatch, tmp_path, setup): + """Test deleting a file from mocked Azure Storage.""" upload_dir = mock_upload_dir(monkeypatch, tmp_path) self.Storage.create_container() - contents, azure_file_path = self.Storage.upload_file( - io.BytesIO(self.file_content), self.filename - ) - assert (upload_dir / self.filename).exists() - self.Storage.delete_file(azure_file_path) + + # Mock upload + self.Storage.upload_file(io.BytesIO(self.file_content), self.filename) + + # Mock deletion + self.Storage.container_client.get_blob_client().delete_blob.return_value = None + + self.Storage.delete_file(f"https://myaccount.blob.core.windows.net/{self.Storage.container_name}/{self.filename}") + + # Assertions + self.Storage.container_client.get_blob_client().delete_blob.assert_called_once() assert not (upload_dir / self.filename).exists() - blob = self.Storage.blob_service_client.get_blob_client( - container=self.Storage.container_name, blob=self.filename - ) - with pytest.raises(Exception): - blob.download_blob().readall() def test_delete_all_files(self, monkeypatch, tmp_path, setup): + """Test deleting all files from mocked Azure Storage.""" upload_dir = mock_upload_dir(monkeypatch, tmp_path) self.Storage.create_container() - self.Storage.upload_file(io.BytesIO(self.file_content), self.filename) - blob = self.Storage.blob_service_client.get_blob_client( - container=self.Storage.container_name, blob=self.filename - ) - assert self.file_content == blob.download_blob().readall() - assert (upload_dir / self.filename).exists() - self.Storage.upload_file(io.BytesIO(self.file_content), self.filename_extra) - blob = self.Storage.blob_service_client.get_blob_client( - container=self.Storage.container_name, blob=self.filename_extra - ) - assert self.file_content == blob.download_blob().readall() - assert (upload_dir / self.filename_extra).exists() + # Mock file uploads + self.Storage.upload_file(io.BytesIO(self.file_content), self.filename) + self.Storage.upload_file(io.BytesIO(self.file_content), self.filename_extra) + + # Mock listing and deletion behavior + self.Storage.container_client.list_blobs.return_value = [ + {"name": self.filename}, + {"name": self.filename_extra}, + ] + self.Storage.container_client.get_blob_client().delete_blob.return_value = None + + # Call delete all files self.Storage.delete_all_files() + + # Assertions + self.Storage.container_client.list_blobs.assert_called_once() + self.Storage.container_client.get_blob_client().delete_blob.assert_any_call() assert not (upload_dir / self.filename).exists() assert not (upload_dir / self.filename_extra).exists() - blob = self.Storage.blob_service_client.get_blob_client( - container=self.Storage.container_name, blob=self.filename - ) - with pytest.raises(Exception): - blob.download_blob().readall() - blob = self.Storage.blob_service_client.get_blob_client( - container=self.Storage.container_name, blob=self.filename_extra - ) - with pytest.raises(Exception): - blob.download_blob().readall() \ No newline at end of file + + def test_get_file_not_found(self, monkeypatch, setup): + """Test handling when a requested file does not exist.""" + self.Storage.create_container() + + # Mock behavior to raise an error for missing files + self.Storage.container_client.get_blob_client().download_blob.side_effect = Exception("Blob not found") + + with pytest.raises(Exception, match="Blob not found"): + self.Storage.get_file(f"https://myaccount.blob.core.windows.net/{self.Storage.container_name}/{self.filename}") + From 4a9a88b683e0da71dd71a96fc1fb19564f9a053b Mon Sep 17 00:00:00 2001 From: Chris Pietschmann Date: Tue, 18 Feb 2025 14:41:42 -0500 Subject: [PATCH 29/96] Update test_provider.py --- backend/open_webui/test/apps/webui/storage/test_provider.py | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/open_webui/test/apps/webui/storage/test_provider.py b/backend/open_webui/test/apps/webui/storage/test_provider.py index e434523f8..6770add7b 100644 --- a/backend/open_webui/test/apps/webui/storage/test_provider.py +++ b/backend/open_webui/test/apps/webui/storage/test_provider.py @@ -305,6 +305,7 @@ class TestAzureStorageProvider: mock_container_client.get_blob_client.return_value = mock_blob_client # Mock `from_connection_string` and `BlobServiceClient` constructor + monkeypatch.setattr(provider, "BlobServiceClient", lambda *_: mock_blob_service_client) monkeypatch.setattr("azure.storage.blob.BlobServiceClient", lambda *_: mock_blob_service_client) # Apply to instance variables From f674e28263574f33a920c79d282389f0364825d2 Mon Sep 17 00:00:00 2001 From: Chris Pietschmann Date: Tue, 18 Feb 2025 14:45:13 -0500 Subject: [PATCH 30/96] Update test_provider.py --- backend/open_webui/test/apps/webui/storage/test_provider.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/open_webui/test/apps/webui/storage/test_provider.py b/backend/open_webui/test/apps/webui/storage/test_provider.py index 6770add7b..2a8655027 100644 --- a/backend/open_webui/test/apps/webui/storage/test_provider.py +++ b/backend/open_webui/test/apps/webui/storage/test_provider.py @@ -292,7 +292,7 @@ class TestAzureStorageProvider: self.file_bytesio_empty = io.BytesIO() super().__init__() - @pytest.fixture + @pytest.fixture(scope="class") def setup(self, monkeypatch): """Mock BlobServiceClient and BlobContainerClient for local testing""" # Create mock Blob Service Client From 9a2e81f5f01045b424a5b7b30118e2ebbbabc45e Mon Sep 17 00:00:00 2001 From: Chris Pietschmann Date: Tue, 18 Feb 2025 14:49:03 -0500 Subject: [PATCH 31/96] Update test_provider.py --- .../test/apps/webui/storage/test_provider.py | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/backend/open_webui/test/apps/webui/storage/test_provider.py b/backend/open_webui/test/apps/webui/storage/test_provider.py index 2a8655027..2ed2c7b0f 100644 --- a/backend/open_webui/test/apps/webui/storage/test_provider.py +++ b/backend/open_webui/test/apps/webui/storage/test_provider.py @@ -283,13 +283,6 @@ class TestGCSStorageProvider: class TestAzureStorageProvider: def __init__(self): - self.Storage = provider.AzureStorageProvider() - self.Storage.endpoint = "https://myaccount.blob.core.windows.net" - self.Storage.container_name = "my-container" - self.file_content = b"test content" - self.filename = "test.txt" - self.filename_extra = "test_extra.txt" - self.file_bytesio_empty = io.BytesIO() super().__init__() @pytest.fixture(scope="class") @@ -305,12 +298,21 @@ class TestAzureStorageProvider: mock_container_client.get_blob_client.return_value = mock_blob_client # Mock `from_connection_string` and `BlobServiceClient` constructor - monkeypatch.setattr(provider, "BlobServiceClient", lambda *_: mock_blob_service_client) monkeypatch.setattr("azure.storage.blob.BlobServiceClient", lambda *_: mock_blob_service_client) + + + self.Storage = provider.AzureStorageProvider() + self.Storage.endpoint = "https://myaccount.blob.core.windows.net" + self.Storage.container_name = "my-container" + self.file_content = b"test content" + self.filename = "test.txt" + self.filename_extra = "test_extra.txt" + self.file_bytesio_empty = io.BytesIO() + # Apply to instance variables - self.Storage.blob_service_client = mock_blob_service_client - self.Storage.container_client = mock_container_client + #self.Storage.blob_service_client = mock_blob_service_client + #self.Storage.container_client = mock_container_client yield From ff5f0c3e3965a5ef44decba7382052548e442a4b Mon Sep 17 00:00:00 2001 From: Chris Pietschmann Date: Tue, 18 Feb 2025 14:51:17 -0500 Subject: [PATCH 32/96] Update test_provider.py --- backend/open_webui/test/apps/webui/storage/test_provider.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/open_webui/test/apps/webui/storage/test_provider.py b/backend/open_webui/test/apps/webui/storage/test_provider.py index 2ed2c7b0f..23d020c13 100644 --- a/backend/open_webui/test/apps/webui/storage/test_provider.py +++ b/backend/open_webui/test/apps/webui/storage/test_provider.py @@ -311,8 +311,8 @@ class TestAzureStorageProvider: self.file_bytesio_empty = io.BytesIO() # Apply to instance variables - #self.Storage.blob_service_client = mock_blob_service_client - #self.Storage.container_client = mock_container_client + self.Storage.blob_service_client = mock_blob_service_client + self.Storage.container_client = mock_container_client yield From 7d1ec2042905513fbd944f8407f6b1bb3ecd8e64 Mon Sep 17 00:00:00 2001 From: Chris Pietschmann Date: Tue, 18 Feb 2025 14:57:51 -0500 Subject: [PATCH 33/96] Update test_provider.py --- backend/open_webui/test/apps/webui/storage/test_provider.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/open_webui/test/apps/webui/storage/test_provider.py b/backend/open_webui/test/apps/webui/storage/test_provider.py index 23d020c13..2bc3ccb64 100644 --- a/backend/open_webui/test/apps/webui/storage/test_provider.py +++ b/backend/open_webui/test/apps/webui/storage/test_provider.py @@ -298,9 +298,9 @@ class TestAzureStorageProvider: mock_container_client.get_blob_client.return_value = mock_blob_client # Mock `from_connection_string` and `BlobServiceClient` constructor - monkeypatch.setattr("azure.storage.blob.BlobServiceClient", lambda *_: mock_blob_service_client) - - + monkeypatch.setattr(azure.storage.blob.BlobServiceClient, lambda *_: mock_blob_service_client) + monkeypatch.setattr(azure.storage.blob.BlobContainerClient, lambda *_: mock_container_client) + monkeypatch.setattr(azure.storage.blob.BlobClient, lambda *_: mock_blob_client) self.Storage = provider.AzureStorageProvider() self.Storage.endpoint = "https://myaccount.blob.core.windows.net" From 56060db29de3287fdf7df124bce2e44f8fef743e Mon Sep 17 00:00:00 2001 From: Chris Pietschmann Date: Tue, 18 Feb 2025 15:01:26 -0500 Subject: [PATCH 34/96] Update test_provider.py --- .../test/apps/webui/storage/test_provider.py | 94 ++++++++----------- 1 file changed, 41 insertions(+), 53 deletions(-) diff --git a/backend/open_webui/test/apps/webui/storage/test_provider.py b/backend/open_webui/test/apps/webui/storage/test_provider.py index 2bc3ccb64..08338375e 100644 --- a/backend/open_webui/test/apps/webui/storage/test_provider.py +++ b/backend/open_webui/test/apps/webui/storage/test_provider.py @@ -282,26 +282,8 @@ class TestGCSStorageProvider: class TestAzureStorageProvider: - def __init__(self): - super().__init__() - - @pytest.fixture(scope="class") - def setup(self, monkeypatch): - """Mock BlobServiceClient and BlobContainerClient for local testing""" - # Create mock Blob Service Client - mock_blob_service_client = MagicMock() - mock_container_client = MagicMock() - mock_blob_client = MagicMock() - - # Set up return values - mock_blob_service_client.get_container_client.return_value = mock_container_client - mock_container_client.get_blob_client.return_value = mock_blob_client - - # Mock `from_connection_string` and `BlobServiceClient` constructor - monkeypatch.setattr(azure.storage.blob.BlobServiceClient, lambda *_: mock_blob_service_client) - monkeypatch.setattr(azure.storage.blob.BlobContainerClient, lambda *_: mock_container_client) - monkeypatch.setattr(azure.storage.blob.BlobClient, lambda *_: mock_blob_client) - + @pytest.fixture(autouse=True) + def setup_storage(self, monkeypatch): self.Storage = provider.AzureStorageProvider() self.Storage.endpoint = "https://myaccount.blob.core.windows.net" self.Storage.container_name = "my-container" @@ -310,34 +292,48 @@ class TestAzureStorageProvider: self.filename_extra = "test_extra.txt" self.file_bytesio_empty = io.BytesIO() - # Apply to instance variables + # Create mock Blob Service Client and related clients + mock_blob_service_client = MagicMock() + mock_container_client = MagicMock() + mock_blob_client = MagicMock() + + # Set up return values for the mock + mock_blob_service_client.get_container_client.return_value = mock_container_client + mock_container_client.get_blob_client.return_value = mock_blob_client + + # Monkeypatch the Azure classes to return our mocks + monkeypatch.setattr( + azure.storage.blob, "BlobServiceClient", lambda *args, **kwargs: mock_blob_service_client + ) + monkeypatch.setattr( + azure.storage.blob, "BlobContainerClient", lambda *args, **kwargs: mock_container_client + ) + monkeypatch.setattr( + azure.storage.blob, "BlobClient", lambda *args, **kwargs: mock_blob_client + ) + + # Apply mocks to the Storage instance self.Storage.blob_service_client = mock_blob_service_client self.Storage.container_client = mock_container_client - yield - - def test_upload_file(self, monkeypatch, tmp_path, setup): - """Test uploading a file to mocked Azure Storage.""" + def test_upload_file(self, monkeypatch, tmp_path): upload_dir = mock_upload_dir(monkeypatch, tmp_path) - + # Simulate an error when container does not exist self.Storage.container_client.get_blob_client.side_effect = Exception("Container does not exist") - with pytest.raises(Exception): self.Storage.upload_file(io.BytesIO(self.file_content), self.filename) # Reset side effect and create container self.Storage.container_client.get_blob_client.side_effect = None - self.Storage.create_container() - contents, azure_file_path = self.Storage.upload_file( - io.BytesIO(self.file_content), self.filename - ) + contents, azure_file_path = self.Storage.upload_file(io.BytesIO(self.file_content), self.filename) # Assertions self.Storage.container_client.get_blob_client.assert_called_with(self.filename) - self.Storage.container_client.get_blob_client().upload_blob.assert_called_once_with(self.file_content, overwrite=True) - + self.Storage.container_client.get_blob_client().upload_blob.assert_called_once_with( + self.file_content, overwrite=True + ) assert contents == self.file_content assert azure_file_path == f"https://myaccount.blob.core.windows.net/{self.Storage.container_name}/{self.filename}" assert (upload_dir / self.filename).exists() @@ -346,42 +342,38 @@ class TestAzureStorageProvider: with pytest.raises(ValueError): self.Storage.upload_file(self.file_bytesio_empty, self.filename) - def test_get_file(self, monkeypatch, tmp_path, setup): - """Test retrieving a file from mocked Azure Storage.""" + def test_get_file(self, monkeypatch, tmp_path): upload_dir = mock_upload_dir(monkeypatch, tmp_path) self.Storage.create_container() # Mock upload behavior self.Storage.upload_file(io.BytesIO(self.file_content), self.filename) - # Mock blob download behavior self.Storage.container_client.get_blob_client().download_blob().readall.return_value = self.file_content - file_path = self.Storage.get_file(f"https://myaccount.blob.core.windows.net/{self.Storage.container_name}/{self.filename}") + file_url = f"https://myaccount.blob.core.windows.net/{self.Storage.container_name}/{self.filename}" + file_path = self.Storage.get_file(file_url) assert file_path == str(upload_dir / self.filename) assert (upload_dir / self.filename).exists() assert (upload_dir / self.filename).read_bytes() == self.file_content - def test_delete_file(self, monkeypatch, tmp_path, setup): - """Test deleting a file from mocked Azure Storage.""" + def test_delete_file(self, monkeypatch, tmp_path): upload_dir = mock_upload_dir(monkeypatch, tmp_path) self.Storage.create_container() - # Mock upload + # Mock file upload self.Storage.upload_file(io.BytesIO(self.file_content), self.filename) - # Mock deletion self.Storage.container_client.get_blob_client().delete_blob.return_value = None - self.Storage.delete_file(f"https://myaccount.blob.core.windows.net/{self.Storage.container_name}/{self.filename}") + file_url = f"https://myaccount.blob.core.windows.net/{self.Storage.container_name}/{self.filename}" + self.Storage.delete_file(file_url) - # Assertions self.Storage.container_client.get_blob_client().delete_blob.assert_called_once() assert not (upload_dir / self.filename).exists() - def test_delete_all_files(self, monkeypatch, tmp_path, setup): - """Test deleting all files from mocked Azure Storage.""" + def test_delete_all_files(self, monkeypatch, tmp_path): upload_dir = mock_upload_dir(monkeypatch, tmp_path) self.Storage.create_container() @@ -396,22 +388,18 @@ class TestAzureStorageProvider: ] self.Storage.container_client.get_blob_client().delete_blob.return_value = None - # Call delete all files self.Storage.delete_all_files() - # Assertions self.Storage.container_client.list_blobs.assert_called_once() self.Storage.container_client.get_blob_client().delete_blob.assert_any_call() assert not (upload_dir / self.filename).exists() assert not (upload_dir / self.filename_extra).exists() - def test_get_file_not_found(self, monkeypatch, setup): - """Test handling when a requested file does not exist.""" + def test_get_file_not_found(self, monkeypatch): self.Storage.create_container() - # Mock behavior to raise an error for missing files + file_url = f"https://myaccount.blob.core.windows.net/{self.Storage.container_name}/{self.filename}" + # Mock behavior to raise an error for missing blobs self.Storage.container_client.get_blob_client().download_blob.side_effect = Exception("Blob not found") - with pytest.raises(Exception, match="Blob not found"): - self.Storage.get_file(f"https://myaccount.blob.core.windows.net/{self.Storage.container_name}/{self.filename}") - + self.Storage.get_file(file_url) \ No newline at end of file From 2c328cc7c9b9b928ed660864208265bad8034538 Mon Sep 17 00:00:00 2001 From: Chris Pietschmann Date: Tue, 18 Feb 2025 15:04:49 -0500 Subject: [PATCH 35/96] Update test_provider.py --- .../test/apps/webui/storage/test_provider.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/backend/open_webui/test/apps/webui/storage/test_provider.py b/backend/open_webui/test/apps/webui/storage/test_provider.py index 08338375e..75a105bc8 100644 --- a/backend/open_webui/test/apps/webui/storage/test_provider.py +++ b/backend/open_webui/test/apps/webui/storage/test_provider.py @@ -284,14 +284,6 @@ class TestGCSStorageProvider: class TestAzureStorageProvider: @pytest.fixture(autouse=True) def setup_storage(self, monkeypatch): - self.Storage = provider.AzureStorageProvider() - self.Storage.endpoint = "https://myaccount.blob.core.windows.net" - self.Storage.container_name = "my-container" - self.file_content = b"test content" - self.filename = "test.txt" - self.filename_extra = "test_extra.txt" - self.file_bytesio_empty = io.BytesIO() - # Create mock Blob Service Client and related clients mock_blob_service_client = MagicMock() mock_container_client = MagicMock() @@ -312,6 +304,15 @@ class TestAzureStorageProvider: azure.storage.blob, "BlobClient", lambda *args, **kwargs: mock_blob_client ) + + self.Storage = provider.AzureStorageProvider() + self.Storage.endpoint = "https://myaccount.blob.core.windows.net" + self.Storage.container_name = "my-container" + self.file_content = b"test content" + self.filename = "test.txt" + self.filename_extra = "test_extra.txt" + self.file_bytesio_empty = io.BytesIO() + # Apply mocks to the Storage instance self.Storage.blob_service_client = mock_blob_service_client self.Storage.container_client = mock_container_client @@ -402,4 +403,4 @@ class TestAzureStorageProvider: # Mock behavior to raise an error for missing blobs self.Storage.container_client.get_blob_client().download_blob.side_effect = Exception("Blob not found") with pytest.raises(Exception, match="Blob not found"): - self.Storage.get_file(file_url) \ No newline at end of file + self.Storage.get_file(file_url) From 9c8c837ab96dbed4b1e6eb3d0135471486179e65 Mon Sep 17 00:00:00 2001 From: Chris Pietschmann Date: Tue, 18 Feb 2025 15:12:35 -0500 Subject: [PATCH 36/96] Update test_provider.py --- .../test/apps/webui/storage/test_provider.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/backend/open_webui/test/apps/webui/storage/test_provider.py b/backend/open_webui/test/apps/webui/storage/test_provider.py index 75a105bc8..3bb71735c 100644 --- a/backend/open_webui/test/apps/webui/storage/test_provider.py +++ b/backend/open_webui/test/apps/webui/storage/test_provider.py @@ -328,7 +328,9 @@ class TestAzureStorageProvider: # Reset side effect and create container self.Storage.container_client.get_blob_client.side_effect = None self.Storage.create_container() - contents, azure_file_path = self.Storage.upload_file(io.BytesIO(self.file_content), self.filename) + contents, azure_file_path = self.Storage.upload_file( + io.BytesIO(self.file_content), self.filename + ) # Assertions self.Storage.container_client.get_blob_client.assert_called_with(self.filename) @@ -336,7 +338,10 @@ class TestAzureStorageProvider: self.file_content, overwrite=True ) assert contents == self.file_content - assert azure_file_path == f"https://myaccount.blob.core.windows.net/{self.Storage.container_name}/{self.filename}" + assert ( + azure_file_path + == f"https://myaccount.blob.core.windows.net/{self.Storage.container_name}/{self.filename}" + ) assert (upload_dir / self.filename).exists() assert (upload_dir / self.filename).read_bytes() == self.file_content @@ -350,7 +355,9 @@ class TestAzureStorageProvider: # Mock upload behavior self.Storage.upload_file(io.BytesIO(self.file_content), self.filename) # Mock blob download behavior - self.Storage.container_client.get_blob_client().download_blob().readall.return_value = self.file_content + self.Storage.container_client.get_blob_client().download_blob().readall.return_value = ( + self.file_content + ) file_url = f"https://myaccount.blob.core.windows.net/{self.Storage.container_name}/{self.filename}" file_path = self.Storage.get_file(file_url) @@ -401,6 +408,8 @@ class TestAzureStorageProvider: file_url = f"https://myaccount.blob.core.windows.net/{self.Storage.container_name}/{self.filename}" # Mock behavior to raise an error for missing blobs - self.Storage.container_client.get_blob_client().download_blob.side_effect = Exception("Blob not found") + self.Storage.container_client.get_blob_client().download_blob.side_effect = ( + Exception("Blob not found") + ) with pytest.raises(Exception, match="Blob not found"): self.Storage.get_file(file_url) From 55bd7a1c6588ec04b76869c4bcbd96183e5059c4 Mon Sep 17 00:00:00 2001 From: Chris Pietschmann Date: Tue, 18 Feb 2025 15:13:30 -0500 Subject: [PATCH 37/96] Update test_provider.py --- .../test/apps/webui/storage/test_provider.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/backend/open_webui/test/apps/webui/storage/test_provider.py b/backend/open_webui/test/apps/webui/storage/test_provider.py index 3bb71735c..77bbcc5e2 100644 --- a/backend/open_webui/test/apps/webui/storage/test_provider.py +++ b/backend/open_webui/test/apps/webui/storage/test_provider.py @@ -290,8 +290,12 @@ class TestAzureStorageProvider: mock_blob_client = MagicMock() # Set up return values for the mock - mock_blob_service_client.get_container_client.return_value = mock_container_client - mock_container_client.get_blob_client.return_value = mock_blob_client + mock_blob_service_client.get_container_client.return_value = ( + mock_container_client + ) + mock_container_client.get_blob_client.return_value = ( + mock_blob_client + ) # Monkeypatch the Azure classes to return our mocks monkeypatch.setattr( @@ -321,7 +325,9 @@ class TestAzureStorageProvider: upload_dir = mock_upload_dir(monkeypatch, tmp_path) # Simulate an error when container does not exist - self.Storage.container_client.get_blob_client.side_effect = Exception("Container does not exist") + self.Storage.container_client.get_blob_client.side_effect = Exception( + "Container does not exist" + ) with pytest.raises(Exception): self.Storage.upload_file(io.BytesIO(self.file_content), self.filename) From 4c352ff9747280f56d03156f4532566a2fa05dd8 Mon Sep 17 00:00:00 2001 From: Chris Pietschmann Date: Tue, 18 Feb 2025 15:14:46 -0500 Subject: [PATCH 38/96] Update test_provider.py --- .../test/apps/webui/storage/test_provider.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/backend/open_webui/test/apps/webui/storage/test_provider.py b/backend/open_webui/test/apps/webui/storage/test_provider.py index 77bbcc5e2..c92dd176b 100644 --- a/backend/open_webui/test/apps/webui/storage/test_provider.py +++ b/backend/open_webui/test/apps/webui/storage/test_provider.py @@ -293,19 +293,23 @@ class TestAzureStorageProvider: mock_blob_service_client.get_container_client.return_value = ( mock_container_client ) - mock_container_client.get_blob_client.return_value = ( - mock_blob_client - ) + mock_container_client.get_blob_client.return_value = mock_blob_client # Monkeypatch the Azure classes to return our mocks monkeypatch.setattr( - azure.storage.blob, "BlobServiceClient", lambda *args, **kwargs: mock_blob_service_client + azure.storage.blob, + "BlobServiceClient", + lambda *args, **kwargs: mock_blob_service_client ) monkeypatch.setattr( - azure.storage.blob, "BlobContainerClient", lambda *args, **kwargs: mock_container_client + azure.storage.blob, + "BlobContainerClient", + lambda *args, **kwargs: mock_container_client ) monkeypatch.setattr( - azure.storage.blob, "BlobClient", lambda *args, **kwargs: mock_blob_client + azure.storage.blob, + "BlobClient", + lambda *args, **kwargs: mock_blob_client ) From a29f83c4e77452416deb28cf6a09632ea0a8606f Mon Sep 17 00:00:00 2001 From: Chris Pietschmann Date: Tue, 18 Feb 2025 15:17:49 -0500 Subject: [PATCH 39/96] updates to formatting --- backend/open_webui/storage/provider.py | 7 ++++--- .../open_webui/test/apps/webui/storage/test_provider.py | 8 +++----- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/backend/open_webui/storage/provider.py b/backend/open_webui/storage/provider.py index 43f4a6922..ae119e39b 100644 --- a/backend/open_webui/storage/provider.py +++ b/backend/open_webui/storage/provider.py @@ -228,7 +228,6 @@ class GCSStorageProvider(StorageProvider): LocalStorageProvider.delete_all_files() - class AzureStorageProvider(StorageProvider): def __init__(self): self.endpoint = AZURE_STORAGE_ENDPOINT @@ -246,7 +245,9 @@ class AzureStorageProvider(StorageProvider): self.blob_service_client = BlobServiceClient( account_url=self.endpoint, credential=DefaultAzureCredential() ) - self.container_client = self.blob_service_client.get_container_client(self.container_name) + self.container_client = self.blob_service_client.get_container_client( + self.container_name + ) def upload_file(self, file: BinaryIO, filename: str) -> Tuple[bytes, str]: """Handles uploading of the file to Azure Blob Storage.""" @@ -293,7 +294,7 @@ class AzureStorageProvider(StorageProvider): # Always delete from local storage LocalStorageProvider.delete_all_files() - + def get_storage_provider(storage_provider: str): if storage_provider == "local": diff --git a/backend/open_webui/test/apps/webui/storage/test_provider.py b/backend/open_webui/test/apps/webui/storage/test_provider.py index c92dd176b..0d5d81f96 100644 --- a/backend/open_webui/test/apps/webui/storage/test_provider.py +++ b/backend/open_webui/test/apps/webui/storage/test_provider.py @@ -299,17 +299,15 @@ class TestAzureStorageProvider: monkeypatch.setattr( azure.storage.blob, "BlobServiceClient", - lambda *args, **kwargs: mock_blob_service_client + lambda *args, **kwargs: mock_blob_service_client, ) monkeypatch.setattr( azure.storage.blob, "BlobContainerClient", - lambda *args, **kwargs: mock_container_client + lambda *args, **kwargs: mock_container_client, ) monkeypatch.setattr( - azure.storage.blob, - "BlobClient", - lambda *args, **kwargs: mock_blob_client + azure.storage.blob, "BlobClient", lambda *args, **kwargs: mock_blob_client ) From 7404494772bb9fee85dfafa2e91e5ee1d8818e6d Mon Sep 17 00:00:00 2001 From: Chris Pietschmann Date: Tue, 18 Feb 2025 15:19:35 -0500 Subject: [PATCH 40/96] formatting --- backend/open_webui/storage/provider.py | 1 - backend/open_webui/test/apps/webui/storage/test_provider.py | 2 -- 2 files changed, 3 deletions(-) diff --git a/backend/open_webui/storage/provider.py b/backend/open_webui/storage/provider.py index ae119e39b..160a45153 100644 --- a/backend/open_webui/storage/provider.py +++ b/backend/open_webui/storage/provider.py @@ -29,7 +29,6 @@ from azure.storage.blob import BlobServiceClient from azure.core.exceptions import ResourceNotFoundError - class StorageProvider(ABC): @abstractmethod def get_file(self, file_path: str) -> str: diff --git a/backend/open_webui/test/apps/webui/storage/test_provider.py b/backend/open_webui/test/apps/webui/storage/test_provider.py index 0d5d81f96..e9b34aa18 100644 --- a/backend/open_webui/test/apps/webui/storage/test_provider.py +++ b/backend/open_webui/test/apps/webui/storage/test_provider.py @@ -280,7 +280,6 @@ class TestGCSStorageProvider: assert self.Storage.bucket.get_blob(self.filename_extra) == None - class TestAzureStorageProvider: @pytest.fixture(autouse=True) def setup_storage(self, monkeypatch): @@ -310,7 +309,6 @@ class TestAzureStorageProvider: azure.storage.blob, "BlobClient", lambda *args, **kwargs: mock_blob_client ) - self.Storage = provider.AzureStorageProvider() self.Storage.endpoint = "https://myaccount.blob.core.windows.net" self.Storage.container_name = "my-container" From 7b5f82ffc783cf3c906954bec1fe18a35a6eb5a2 Mon Sep 17 00:00:00 2001 From: Chris Pietschmann Date: Tue, 18 Feb 2025 15:26:04 -0500 Subject: [PATCH 41/96] Update test_provider.py --- backend/open_webui/test/apps/webui/storage/test_provider.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/open_webui/test/apps/webui/storage/test_provider.py b/backend/open_webui/test/apps/webui/storage/test_provider.py index e9b34aa18..c9c63a008 100644 --- a/backend/open_webui/test/apps/webui/storage/test_provider.py +++ b/backend/open_webui/test/apps/webui/storage/test_provider.py @@ -3,7 +3,7 @@ import os import boto3 import pytest from botocore.exceptions import ClientError -from moto import mock_aws, mock_azure +from moto import mock_aws from open_webui.storage import provider from gcp_storage_emulator.server import create_server from google.cloud import storage From 888ae008679b16d3730ac972a6f0f3102d1f20fe Mon Sep 17 00:00:00 2001 From: Chris Pietschmann Date: Tue, 18 Feb 2025 15:39:47 -0500 Subject: [PATCH 42/96] Update test_provider.py --- backend/open_webui/test/apps/webui/storage/test_provider.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/backend/open_webui/test/apps/webui/storage/test_provider.py b/backend/open_webui/test/apps/webui/storage/test_provider.py index c9c63a008..08e8acb3e 100644 --- a/backend/open_webui/test/apps/webui/storage/test_provider.py +++ b/backend/open_webui/test/apps/webui/storage/test_provider.py @@ -7,7 +7,7 @@ from moto import mock_aws from open_webui.storage import provider from gcp_storage_emulator.server import create_server from google.cloud import storage -from azure.storage.blob import BlobServiceClient, BlobContainerClient, BlobClient +from azure.storage.blob import BlobServiceClient, ContainerClient, BlobClient from unittest.mock import MagicMock @@ -302,7 +302,7 @@ class TestAzureStorageProvider: ) monkeypatch.setattr( azure.storage.blob, - "BlobContainerClient", + "ContainerClient", lambda *args, **kwargs: mock_container_client, ) monkeypatch.setattr( @@ -354,6 +354,8 @@ class TestAzureStorageProvider: with pytest.raises(ValueError): self.Storage.upload_file(self.file_bytesio_empty, self.filename) + assert (true == false).equals(true) + def test_get_file(self, monkeypatch, tmp_path): upload_dir = mock_upload_dir(monkeypatch, tmp_path) self.Storage.create_container() From 9864185b57a0810155e5cbb9ecf3f9fe281dc8b2 Mon Sep 17 00:00:00 2001 From: Chris Pietschmann Date: Tue, 18 Feb 2025 15:49:44 -0500 Subject: [PATCH 43/96] Update test_provider.py --- backend/open_webui/test/apps/webui/storage/test_provider.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/backend/open_webui/test/apps/webui/storage/test_provider.py b/backend/open_webui/test/apps/webui/storage/test_provider.py index 08e8acb3e..accbb05d1 100644 --- a/backend/open_webui/test/apps/webui/storage/test_provider.py +++ b/backend/open_webui/test/apps/webui/storage/test_provider.py @@ -281,7 +281,10 @@ class TestGCSStorageProvider: class TestAzureStorageProvider: - @pytest.fixture(autouse=True) + def __init__(self): + super().__init__() + + @pytest.fixture(scope="class") def setup_storage(self, monkeypatch): # Create mock Blob Service Client and related clients mock_blob_service_client = MagicMock() @@ -354,7 +357,6 @@ class TestAzureStorageProvider: with pytest.raises(ValueError): self.Storage.upload_file(self.file_bytesio_empty, self.filename) - assert (true == false).equals(true) def test_get_file(self, monkeypatch, tmp_path): upload_dir = mock_upload_dir(monkeypatch, tmp_path) From a232f1f34ee5d931fc1d803b526bcdae8b0b9396 Mon Sep 17 00:00:00 2001 From: Chris Pietschmann Date: Tue, 18 Feb 2025 15:53:54 -0500 Subject: [PATCH 44/96] Update test_provider.py --- backend/open_webui/test/apps/webui/storage/test_provider.py | 1 - 1 file changed, 1 deletion(-) diff --git a/backend/open_webui/test/apps/webui/storage/test_provider.py b/backend/open_webui/test/apps/webui/storage/test_provider.py index accbb05d1..a5ef13504 100644 --- a/backend/open_webui/test/apps/webui/storage/test_provider.py +++ b/backend/open_webui/test/apps/webui/storage/test_provider.py @@ -357,7 +357,6 @@ class TestAzureStorageProvider: with pytest.raises(ValueError): self.Storage.upload_file(self.file_bytesio_empty, self.filename) - def test_get_file(self, monkeypatch, tmp_path): upload_dir = mock_upload_dir(monkeypatch, tmp_path) self.Storage.create_container() From e56b5c063ca72fb5fa3f37965730b63e078a42bd Mon Sep 17 00:00:00 2001 From: JoaoCostaIFG Date: Tue, 18 Feb 2025 22:39:32 +0000 Subject: [PATCH 45/96] feat: add Google Imagen/Gemini API image generation Adds support for Gemini API as an image generation backend. By setting the API Base URL to something like 'https://generativelanguage.googleapis.com/v1beta' and providing their API Key, users should be able to start generating images using models like 'imagen-3.0-generate-002'. --- backend/open_webui/config.py | 14 +++++ backend/open_webui/main.py | 5 ++ backend/open_webui/routers/images.py | 63 +++++++++++++++++++ .../components/admin/Settings/Images.svelte | 22 +++++++ 4 files changed, 104 insertions(+) diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index adfdcfec8..0b147cf12 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -767,6 +767,9 @@ ENABLE_OPENAI_API = PersistentConfig( OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "") OPENAI_API_BASE_URL = os.environ.get("OPENAI_API_BASE_URL", "") +GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY", "") +GEMINI_API_BASE_URL = os.environ.get("GEMINI_API_BASE_URL", "") + if OPENAI_API_BASE_URL == "": OPENAI_API_BASE_URL = "https://api.openai.com/v1" @@ -2064,6 +2067,17 @@ IMAGES_OPENAI_API_KEY = PersistentConfig( os.getenv("IMAGES_OPENAI_API_KEY", OPENAI_API_KEY), ) +IMAGES_GEMINI_API_BASE_URL = PersistentConfig( + "IMAGES_GEMINI_API_BASE_URL", + "image_generation.gemini.api_base_url", + os.getenv("IMAGES_GEMINI_API_BASE_URL", GEMINI_API_BASE_URL), +) +IMAGES_GEMINI_API_KEY = PersistentConfig( + "IMAGES_GEMINI_API_KEY", + "image_generation.gemini.api_key", + os.getenv("IMAGES_GEMINI_API_KEY", GEMINI_API_KEY), +) + IMAGE_SIZE = PersistentConfig( "IMAGE_SIZE", "image_generation.size", os.getenv("IMAGE_SIZE", "512x512") ) diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index a36323151..22b0526da 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -125,6 +125,8 @@ from open_webui.config import ( IMAGE_STEPS, IMAGES_OPENAI_API_BASE_URL, IMAGES_OPENAI_API_KEY, + IMAGES_GEMINI_API_BASE_URL, + IMAGES_GEMINI_API_KEY, # Audio AUDIO_STT_ENGINE, AUDIO_STT_MODEL, @@ -631,6 +633,9 @@ app.state.config.ENABLE_IMAGE_PROMPT_GENERATION = ENABLE_IMAGE_PROMPT_GENERATION app.state.config.IMAGES_OPENAI_API_BASE_URL = IMAGES_OPENAI_API_BASE_URL app.state.config.IMAGES_OPENAI_API_KEY = IMAGES_OPENAI_API_KEY +app.state.config.IMAGES_GEMINI_API_BASE_URL = IMAGES_GEMINI_API_BASE_URL +app.state.config.IMAGES_GEMINI_API_KEY = IMAGES_GEMINI_API_KEY + app.state.config.IMAGE_GENERATION_MODEL = IMAGE_GENERATION_MODEL app.state.config.AUTOMATIC1111_BASE_URL = AUTOMATIC1111_BASE_URL diff --git a/backend/open_webui/routers/images.py b/backend/open_webui/routers/images.py index 4046773de..4c68442b7 100644 --- a/backend/open_webui/routers/images.py +++ b/backend/open_webui/routers/images.py @@ -55,6 +55,10 @@ async def get_config(request: Request, user=Depends(get_admin_user)): "COMFYUI_WORKFLOW": request.app.state.config.COMFYUI_WORKFLOW, "COMFYUI_WORKFLOW_NODES": request.app.state.config.COMFYUI_WORKFLOW_NODES, }, + "gemini": { + "GEMINI_API_BASE_URL": request.app.state.config.IMAGES_GEMINI_API_BASE_URL, + "GEMINI_API_KEY": request.app.state.config.IMAGES_GEMINI_API_KEY, + }, } @@ -78,6 +82,11 @@ class ComfyUIConfigForm(BaseModel): COMFYUI_WORKFLOW_NODES: list[dict] +class GeminiConfigForm(BaseModel): + GEMINI_API_BASE_URL: str + GEMINI_API_KEY: str + + class ConfigForm(BaseModel): enabled: bool engine: str @@ -85,6 +94,7 @@ class ConfigForm(BaseModel): openai: OpenAIConfigForm automatic1111: Automatic1111ConfigForm comfyui: ComfyUIConfigForm + gemini: GeminiConfigForm @router.post("/config/update") @@ -103,6 +113,11 @@ async def update_config( ) request.app.state.config.IMAGES_OPENAI_API_KEY = form_data.openai.OPENAI_API_KEY + request.app.state.config.IMAGES_GEMINI_API_BASE_URL = ( + form_data.gemini.GEMINI_API_BASE_URL + ) + request.app.state.config.IMAGES_GEMINI_API_KEY = form_data.gemini.GEMINI_API_KEY + request.app.state.config.AUTOMATIC1111_BASE_URL = ( form_data.automatic1111.AUTOMATIC1111_BASE_URL ) @@ -155,6 +170,10 @@ async def update_config( "COMFYUI_WORKFLOW": request.app.state.config.COMFYUI_WORKFLOW, "COMFYUI_WORKFLOW_NODES": request.app.state.config.COMFYUI_WORKFLOW_NODES, }, + "gemini": { + "GEMINI_API_BASE_URL": request.app.state.config.IMAGES_GEMINI_API_BASE_URL, + "GEMINI_API_KEY": request.app.state.config.IMAGES_GEMINI_API_KEY, + }, } @@ -224,6 +243,12 @@ def get_image_model(request): if request.app.state.config.IMAGE_GENERATION_MODEL else "dall-e-2" ) + elif request.app.state.config.IMAGE_GENERATION_ENGINE == "gemini": + return ( + request.app.state.config.IMAGE_GENERATION_MODEL + if request.app.state.config.IMAGE_GENERATION_MODEL + else "imagen-3.0-generate-002" + ) elif request.app.state.config.IMAGE_GENERATION_ENGINE == "comfyui": return ( request.app.state.config.IMAGE_GENERATION_MODEL @@ -299,6 +324,10 @@ def get_models(request: Request, user=Depends(get_verified_user)): {"id": "dall-e-2", "name": "DALL·E 2"}, {"id": "dall-e-3", "name": "DALL·E 3"}, ] + elif request.app.state.config.IMAGE_GENERATION_ENGINE == "gemini": + return [ + {"id": "imagen-3-0-generate-002", "name": "imagen-3.0 generate-002"}, + ] elif request.app.state.config.IMAGE_GENERATION_ENGINE == "comfyui": # TODO - get models from comfyui headers = { @@ -483,6 +512,40 @@ async def image_generations( images.append({"url": url}) return images + elif request.app.state.config.IMAGE_GENERATION_ENGINE == "gemini": + headers = {} + headers["Content-Type"] = "application/json" + api_key = request.app.state.config.IMAGES_GEMINI_API_KEY + model = get_image_model(request) + data = { + "instances": {"prompt": form_data.prompt}, + "parameters": { + "sampleCount": form_data.n, + "outputOptions": {"mimeType": "image/png"}, + }, + } + + # Use asyncio.to_thread for the requests.post call + r = await asyncio.to_thread( + requests.post, + url=f"{request.app.state.config.IMAGES_GEMINI_API_BASE_URL}/models/{model}:predict?key={api_key}", + json=data, + headers=headers, + ) + + r.raise_for_status() + res = r.json() + + images = [] + for image in res["predictions"]: + image_data, content_type = load_b64_image_data( + image["bytesBase64Encoded"] + ) + url = upload_image(request, data, image_data, content_type, user) + images.append({"url": url}) + + return images + elif request.app.state.config.IMAGE_GENERATION_ENGINE == "comfyui": data = { "prompt": form_data.prompt, diff --git a/src/lib/components/admin/Settings/Images.svelte b/src/lib/components/admin/Settings/Images.svelte index 4277c4ebd..590886702 100644 --- a/src/lib/components/admin/Settings/Images.svelte +++ b/src/lib/components/admin/Settings/Images.svelte @@ -261,6 +261,9 @@ } else if (config.engine === 'openai' && config.openai.OPENAI_API_KEY === '') { toast.error($i18n.t('OpenAI API Key is required.')); config.enabled = false; + } else if (config.engine === 'gemini' && config.gemini.GEMINI_API_KEY === '') { + toast.error($i18n.t('Gemini API Key is required.')); + config.enabled = false; } } @@ -294,6 +297,7 @@ +
    @@ -605,6 +609,24 @@ /> + {:else if config?.engine === 'gemini'} +
    +
    {$i18n.t('Gemini API Config')}
    + +
    + + + +
    +
    {/if} From 918764a4f7093ec0838ceeff358356e0942978e6 Mon Sep 17 00:00:00 2001 From: JoaoCostaIFG Date: Wed, 19 Feb 2025 00:00:54 +0000 Subject: [PATCH 46/96] fix: Use x-goog-api-key header for Gemini image generation Place the API key in a header instead of a query parameter. This avoids leaking the API key in logs on request failure, etc... --- backend/open_webui/routers/images.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/backend/open_webui/routers/images.py b/backend/open_webui/routers/images.py index 4c68442b7..3288ec6d8 100644 --- a/backend/open_webui/routers/images.py +++ b/backend/open_webui/routers/images.py @@ -515,7 +515,8 @@ async def image_generations( elif request.app.state.config.IMAGE_GENERATION_ENGINE == "gemini": headers = {} headers["Content-Type"] = "application/json" - api_key = request.app.state.config.IMAGES_GEMINI_API_KEY + headers["x-goog-api-key"] = request.app.state.config.IMAGES_GEMINI_API_KEY + model = get_image_model(request) data = { "instances": {"prompt": form_data.prompt}, @@ -528,7 +529,7 @@ async def image_generations( # Use asyncio.to_thread for the requests.post call r = await asyncio.to_thread( requests.post, - url=f"{request.app.state.config.IMAGES_GEMINI_API_BASE_URL}/models/{model}:predict?key={api_key}", + url=f"{request.app.state.config.IMAGES_GEMINI_API_BASE_URL}/models/{model}:predict", json=data, headers=headers, ) From 5639ba423bfaa5340bb7b26e56cb22d341f13be5 Mon Sep 17 00:00:00 2001 From: Alex Gleason Date: Tue, 18 Feb 2025 18:47:56 -0600 Subject: [PATCH 47/96] Fix "Cannot read properties of undefined (reading 'startsWith')" --- src/lib/components/chat/Messages/Markdown/Source.svelte | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib/components/chat/Messages/Markdown/Source.svelte b/src/lib/components/chat/Messages/Markdown/Source.svelte index 4eb1fffb7..b7c7513ae 100644 --- a/src/lib/components/chat/Messages/Markdown/Source.svelte +++ b/src/lib/components/chat/Messages/Markdown/Source.svelte @@ -2,7 +2,7 @@ export let token; export let onClick: Function = () => {}; - let attributes: Record = {}; + let attributes: Record = {}; function extractAttributes(input: string): Record { const regex = /(\w+)="([^"]*)"/g; @@ -42,6 +42,6 @@ }} > - {formattedTitle(attributes.title)} + {attributes.title ? formattedTitle(attributes.title) : ''} From 7837843f829941435be03da712b669189ad72ad0 Mon Sep 17 00:00:00 2001 From: = Date: Tue, 18 Feb 2025 21:16:54 -0600 Subject: [PATCH 48/96] Add redirect capability This feature allows the authentication process to redirect to a route passed in the querystring. This allows the /auth route a means of bringing the user to an expected route instead of the main page (root). --- src/routes/auth/+page.svelte | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/routes/auth/+page.svelte b/src/routes/auth/+page.svelte index 60431bcec..02746d26e 100644 --- a/src/routes/auth/+page.svelte +++ b/src/routes/auth/+page.svelte @@ -28,6 +28,12 @@ let ldapUsername = ''; + const querystringValue = (key) => { + const querystring = window.location.search; + const urlParams = new URLSearchParams(querystring); + return urlParams.get(key); + }; + const setSessionUser = async (sessionUser) => { if (sessionUser) { console.log(sessionUser); @@ -39,7 +45,9 @@ $socket.emit('user-join', { auth: { token: sessionUser.token } }); await user.set(sessionUser); await config.set(await getBackendConfig()); - goto('/'); + + const redirectPath = querystringValue('redirect') || '/'; + goto(redirectPath); } }; From 4ef7aff66304f98c871ed88214162c22f1c682f4 Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Tue, 18 Feb 2025 19:35:22 -0800 Subject: [PATCH 49/96] refac --- backend/open_webui/retrieval/utils.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/backend/open_webui/retrieval/utils.py b/backend/open_webui/retrieval/utils.py index e5ba55878..59490f37f 100644 --- a/backend/open_webui/retrieval/utils.py +++ b/backend/open_webui/retrieval/utils.py @@ -138,7 +138,7 @@ def query_doc_with_hybrid_search( def merge_and_sort_query_results( - query_results: list[dict], k: int, reverse: bool = False + query_results: list[dict], k: int, reverse: bool = False ) -> list[dict]: # Initialize lists to store combined data combined_distances = [] @@ -151,10 +151,17 @@ def merge_and_sort_query_results( combined_documents.extend(data["documents"][0]) combined_metadatas.extend(data["metadatas"][0]) # DISTINCT(chunk_id,file_id) - in case if id (chunk_ids) become ordinals - combined_ids.extend([id + meta["file_id"] for id, meta in zip(data["ids"][0], data["metadatas"][0])]) + combined_ids.extend( + [ + f"{id}-{meta['file_id']}" + for id, meta in zip(data["ids"][0], data["metadatas"][0]) + ] + ) # Create a list of tuples (distance, document, metadata, ids) - combined = list(zip(combined_distances, combined_documents, combined_metadatas, combined_ids)) + combined = list( + zip(combined_distances, combined_documents, combined_metadatas, combined_ids) + ) # Sort the list based on distances combined.sort(key=lambda x: x[0], reverse=reverse) From 32432d6df16ce50ee38dcae4039b4752227989e9 Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Tue, 18 Feb 2025 20:00:24 -0800 Subject: [PATCH 50/96] fix: do NOT update changelog --- CHANGELOG.md | 6 ------ 1 file changed, 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 358fda15b..a61d81f46 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,12 +5,6 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [0.5.15] - -### Added - -- **🌍 Enhanced Internationalization (i18n)**: Improved right-to-left languages experience with automatic text direction handling in chat and sidebar - ## [0.5.14] - 2025-02-17 ### Fixed From 9380fcd09a7ed037c99cd22d4bfcdcd352b0e0bd Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Tue, 18 Feb 2025 20:01:24 -0800 Subject: [PATCH 51/96] refac --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 74b0facac..730cdfdc6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -109,6 +109,9 @@ dependencies = [ "googleapis-common-protos==1.63.2", "google-cloud-storage==2.19.0", + "azure-identity==1.20.0", + "azure-storage-blob==12.24.1", + "ldap3==2.9.1", "gcp-storage-emulator>=2024.8.3", ] From 2ee09d9f7dbdb908f23c435d3b581035d92047f0 Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Tue, 18 Feb 2025 20:07:59 -0800 Subject: [PATCH 52/96] fix: prompt suggestions settings --- src/lib/components/admin/Settings/Interface.svelte | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/components/admin/Settings/Interface.svelte b/src/lib/components/admin/Settings/Interface.svelte index 8b2a310ef..e3542475e 100644 --- a/src/lib/components/admin/Settings/Interface.svelte +++ b/src/lib/components/admin/Settings/Interface.svelte @@ -51,7 +51,7 @@ onMount(async () => { taskConfig = await getTaskConfig(localStorage.token); - promptSuggestions = $config?.default_prompt_suggestions; + promptSuggestions = $config?.default_prompt_suggestions ?? []; banners = await getBanners(localStorage.token); }); From 003968f06a260ab1ac7418c2d30bd85ed4ba2747 Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Tue, 18 Feb 2025 20:15:16 -0800 Subject: [PATCH 53/96] enh: jupyter matplotlib support --- .../components/chat/Messages/CodeBlock.svelte | 99 ++++++++++++++++++- 1 file changed, 94 insertions(+), 5 deletions(-) diff --git a/src/lib/components/chat/Messages/CodeBlock.svelte b/src/lib/components/chat/Messages/CodeBlock.svelte index c4614d52b..59f58d2e3 100644 --- a/src/lib/components/chat/Messages/CodeBlock.svelte +++ b/src/lib/components/chat/Messages/CodeBlock.svelte @@ -130,9 +130,65 @@ }); if (output) { - stdout = output.stdout; - stderr = output.stderr; - result = output.result; + if (output['stdout']) { + stdout = output['stdout']; + const stdoutLines = stdout.split('\n'); + + for (const [idx, line] of stdoutLines.entries()) { + if (line.startsWith('data:image/png;base64')) { + if (files) { + files.push({ + type: 'image/png', + data: line + }); + } else { + files = [ + { + type: 'image/png', + data: line + } + ]; + } + + if (stdout.startsWith(`${line}\n`)) { + stdout = stdout.replace(`${line}\n`, ``); + } else if (stdout.startsWith(`${line}`)) { + stdout = stdout.replace(`${line}`, ``); + } + } + } + } + + if (output['result']) { + result = output['result']; + const resultLines = result.split('\n'); + + for (const [idx, line] of resultLines.entries()) { + if (line.startsWith('data:image/png;base64')) { + if (files) { + files.push({ + type: 'image/png', + data: line + }); + } else { + files = [ + { + type: 'image/png', + data: line + } + ]; + } + + if (result.startsWith(`${line}\n`)) { + result = result.replace(`${line}\n`, ``); + } else if (result.startsWith(`${line}`)) { + result = result.replace(`${line}`, ``); + } + } + } + } + + output['stderr'] && (stderr = output['stderr']); } } else { executePythonAsWorker(code); @@ -205,7 +261,40 @@ ]; } - stdout = stdout.replace(`${line}\n`, ``); + if (stdout.startsWith(`${line}\n`)) { + stdout = stdout.replace(`${line}\n`, ``); + } else if (stdout.startsWith(`${line}`)) { + stdout = stdout.replace(`${line}`, ``); + } + } + } + } + + if (data['result']) { + result = data['result']; + const resultLines = result.split('\n'); + + for (const [idx, line] of resultLines.entries()) { + if (line.startsWith('data:image/png;base64')) { + if (files) { + files.push({ + type: 'image/png', + data: line + }); + } else { + files = [ + { + type: 'image/png', + data: line + } + ]; + } + + if (result.startsWith(`${line}\n`)) { + result = result.replace(`${line}\n`, ``); + } else if (result.startsWith(`${line}`)) { + result = result.replace(`${line}`, ``); + } } } } @@ -391,7 +480,7 @@ class="bg-gray-50 dark:bg-[#202123] dark:text-white max-w-full overflow-x-auto scrollbar-hidden" /> - {#if executing || stdout || stderr || result} + {#if executing || stdout || stderr || result || files}
    From a6a7c548d54e0bb5bd2435fcc19293e03438ab16 Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Tue, 18 Feb 2025 20:24:04 -0800 Subject: [PATCH 54/96] refac: code block styling --- src/app.css | 2 +- src/lib/components/chat/Messages/CodeBlock.svelte | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/app.css b/src/app.css index d324175b5..8bdc6f1ad 100644 --- a/src/app.css +++ b/src/app.css @@ -101,7 +101,7 @@ li p { /* Dark theme scrollbar styles */ .dark ::-webkit-scrollbar-thumb { - background-color: rgba(33, 33, 33, 0.8); /* Darker color for dark theme */ + background-color: rgba(42, 42, 42, 0.8); /* Darker color for dark theme */ border-color: rgba(0, 0, 0, var(--tw-border-opacity)); } diff --git a/src/lib/components/chat/Messages/CodeBlock.svelte b/src/lib/components/chat/Messages/CodeBlock.svelte index 59f58d2e3..7d318dc79 100644 --- a/src/lib/components/chat/Messages/CodeBlock.svelte +++ b/src/lib/components/chat/Messages/CodeBlock.svelte @@ -493,7 +493,13 @@ {#if stdout || stderr}
    STDOUT/STDERR
    -
    {stdout || stderr}
    +
    + {stdout || stderr} +
    {/if} {#if result || files} From 81715f6553be7968e454fb8125c27b9e7bf4c9aa Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Tue, 18 Feb 2025 21:14:58 -0800 Subject: [PATCH 55/96] enh: RAG full context mode --- backend/open_webui/config.py | 10 +- backend/open_webui/main.py | 3 + backend/open_webui/retrieval/utils.py | 112 ++++++++++++++---- backend/open_webui/routers/retrieval.py | 11 +- backend/open_webui/utils/middleware.py | 4 +- .../admin/Settings/Documents.svelte | 19 ++- 6 files changed, 127 insertions(+), 32 deletions(-) diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index 714c4486c..6e5fb8de6 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -1578,6 +1578,12 @@ ENABLE_RAG_HYBRID_SEARCH = PersistentConfig( os.environ.get("ENABLE_RAG_HYBRID_SEARCH", "").lower() == "true", ) +RAG_FULL_CONTEXT = PersistentConfig( + "RAG_FULL_CONTEXT", + "rag.full_context", + os.getenv("RAG_FULL_CONTEXT", "False").lower() == "true", +) + RAG_FILE_MAX_COUNT = PersistentConfig( "RAG_FILE_MAX_COUNT", "rag.file.max_count", @@ -1929,7 +1935,7 @@ RAG_WEB_SEARCH_CONCURRENT_REQUESTS = PersistentConfig( RAG_WEB_LOADER_ENGINE = PersistentConfig( "RAG_WEB_LOADER_ENGINE", "rag.web.loader.engine", - os.environ.get("RAG_WEB_LOADER_ENGINE", "safe_web") + os.environ.get("RAG_WEB_LOADER_ENGINE", "safe_web"), ) RAG_WEB_SEARCH_TRUST_ENV = PersistentConfig( @@ -1941,7 +1947,7 @@ RAG_WEB_SEARCH_TRUST_ENV = PersistentConfig( PLAYWRIGHT_WS_URI = PersistentConfig( "PLAYWRIGHT_WS_URI", "rag.web.loader.engine.playwright.ws.uri", - os.environ.get("PLAYWRIGHT_WS_URI", None) + os.environ.get("PLAYWRIGHT_WS_URI", None), ) #################################### diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index 4eafbb533..5cad2ac27 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -156,6 +156,7 @@ from open_webui.config import ( # Retrieval RAG_TEMPLATE, DEFAULT_RAG_TEMPLATE, + RAG_FULL_CONTEXT, RAG_EMBEDDING_MODEL, RAG_EMBEDDING_MODEL_AUTO_UPDATE, RAG_EMBEDDING_MODEL_TRUST_REMOTE_CODE, @@ -519,6 +520,8 @@ app.state.config.RELEVANCE_THRESHOLD = RAG_RELEVANCE_THRESHOLD app.state.config.FILE_MAX_SIZE = RAG_FILE_MAX_SIZE app.state.config.FILE_MAX_COUNT = RAG_FILE_MAX_COUNT + +app.state.config.RAG_FULL_CONTEXT = RAG_FULL_CONTEXT app.state.config.ENABLE_RAG_HYBRID_SEARCH = ENABLE_RAG_HYBRID_SEARCH app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION = ( ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION diff --git a/backend/open_webui/retrieval/utils.py b/backend/open_webui/retrieval/utils.py index 59490f37f..887d6e02a 100644 --- a/backend/open_webui/retrieval/utils.py +++ b/backend/open_webui/retrieval/utils.py @@ -84,6 +84,19 @@ def query_doc( raise e +def get_doc(collection_name: str, user: UserModel = None): + try: + result = VECTOR_DB_CLIENT.get(collection_name=collection_name) + + if result: + log.info(f"query_doc:result {result.ids} {result.metadatas}") + + return result + except Exception as e: + print(e) + raise e + + def query_doc_with_hybrid_search( collection_name: str, query: str, @@ -137,6 +150,24 @@ def query_doc_with_hybrid_search( raise e +def merge_get_results(get_results: list[dict]) -> dict: + # Initialize lists to store combined data + combined_documents = [] + combined_metadatas = [] + + for data in get_results: + combined_documents.extend(data["documents"][0]) + combined_metadatas.extend(data["metadatas"][0]) + + # Create the output dictionary + result = { + "documents": [combined_documents], + "metadatas": [combined_metadatas], + } + + return result + + def merge_and_sort_query_results( query_results: list[dict], k: int, reverse: bool = False ) -> list[dict]: @@ -194,6 +225,23 @@ def merge_and_sort_query_results( return result +def get_all_items_from_collections(collection_names: list[str]) -> dict: + results = [] + + for collection_name in collection_names: + if collection_name: + try: + result = get_doc(collection_name=collection_name) + if result is not None: + results.append(result.model_dump()) + except Exception as e: + log.exception(f"Error when querying the collection: {e}") + else: + pass + + return merge_get_results(results) + + def query_collection( collection_names: list[str], queries: list[str], @@ -311,8 +359,11 @@ def get_sources_from_files( reranking_function, r, hybrid_search, + full_context=False, ): - log.debug(f"files: {files} {queries} {embedding_function} {reranking_function}") + log.debug( + f"files: {files} {queries} {embedding_function} {reranking_function} {full_context}" + ) extracted_collections = [] relevant_contexts = [] @@ -350,36 +401,45 @@ def get_sources_from_files( log.debug(f"skipping {file} as it has already been extracted") continue - try: - context = None - if file.get("type") == "text": - context = file["content"] - else: - if hybrid_search: - try: - context = query_collection_with_hybrid_search( + if full_context: + try: + context = get_all_items_from_collections(collection_names) + + print("context", context) + except Exception as e: + log.exception(e) + + else: + try: + context = None + if file.get("type") == "text": + context = file["content"] + else: + if hybrid_search: + try: + context = query_collection_with_hybrid_search( + collection_names=collection_names, + queries=queries, + embedding_function=embedding_function, + k=k, + reranking_function=reranking_function, + r=r, + ) + except Exception as e: + log.debug( + "Error when using hybrid search, using" + " non hybrid search as fallback." + ) + + if (not hybrid_search) or (context is None): + context = query_collection( collection_names=collection_names, queries=queries, embedding_function=embedding_function, k=k, - reranking_function=reranking_function, - r=r, ) - except Exception as e: - log.debug( - "Error when using hybrid search, using" - " non hybrid search as fallback." - ) - - if (not hybrid_search) or (context is None): - context = query_collection( - collection_names=collection_names, - queries=queries, - embedding_function=embedding_function, - k=k, - ) - except Exception as e: - log.exception(e) + except Exception as e: + log.exception(e) extracted_collections.extend(collection_names) diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index 4f7d20fa9..e69d2ce96 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -351,6 +351,7 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)): return { "status": True, "pdf_extract_images": request.app.state.config.PDF_EXTRACT_IMAGES, + "RAG_FULL_CONTEXT": request.app.state.config.RAG_FULL_CONTEXT, "enable_google_drive_integration": request.app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION, "content_extraction": { "engine": request.app.state.config.CONTENT_EXTRACTION_ENGINE, @@ -463,6 +464,7 @@ class WebConfig(BaseModel): class ConfigUpdateForm(BaseModel): + RAG_FULL_CONTEXT: Optional[bool] = None pdf_extract_images: Optional[bool] = None enable_google_drive_integration: Optional[bool] = None file: Optional[FileConfig] = None @@ -482,6 +484,12 @@ async def update_rag_config( else request.app.state.config.PDF_EXTRACT_IMAGES ) + request.app.state.config.RAG_FULL_CONTEXT = ( + form_data.RAG_FULL_CONTEXT + if form_data.RAG_FULL_CONTEXT is not None + else request.app.state.config.RAG_FULL_CONTEXT + ) + request.app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION = ( form_data.enable_google_drive_integration if form_data.enable_google_drive_integration is not None @@ -588,6 +596,7 @@ async def update_rag_config( return { "status": True, "pdf_extract_images": request.app.state.config.PDF_EXTRACT_IMAGES, + "RAG_FULL_CONTEXT": request.app.state.config.RAG_FULL_CONTEXT, "file": { "max_size": request.app.state.config.FILE_MAX_SIZE, "max_count": request.app.state.config.FILE_MAX_COUNT, @@ -1379,7 +1388,7 @@ async def process_web_search( docs, collection_name, overwrite=True, - user=user + user=user, ) return { diff --git a/backend/open_webui/utils/middleware.py b/backend/open_webui/utils/middleware.py index 073a019ed..b624f2a34 100644 --- a/backend/open_webui/utils/middleware.py +++ b/backend/open_webui/utils/middleware.py @@ -344,7 +344,7 @@ async def chat_web_search_handler( "query": searchQuery, } ), - user=user + user=user, ) if results: @@ -560,9 +560,9 @@ async def chat_completion_files_handler( reranking_function=request.app.state.rf, r=request.app.state.config.RELEVANCE_THRESHOLD, hybrid_search=request.app.state.config.ENABLE_RAG_HYBRID_SEARCH, + full_context=request.app.state.config.RAG_FULL_CONTEXT, ), ) - except Exception as e: log.exception(e) diff --git a/src/lib/components/admin/Settings/Documents.svelte b/src/lib/components/admin/Settings/Documents.svelte index 917e924ae..c7c1f0e8f 100644 --- a/src/lib/components/admin/Settings/Documents.svelte +++ b/src/lib/components/admin/Settings/Documents.svelte @@ -27,7 +27,6 @@ import SensitiveInput from '$lib/components/common/SensitiveInput.svelte'; import Tooltip from '$lib/components/common/Tooltip.svelte'; import Switch from '$lib/components/common/Switch.svelte'; - import { text } from '@sveltejs/kit'; import Textarea from '$lib/components/common/Textarea.svelte'; const i18n = getContext('i18n'); @@ -56,6 +55,8 @@ let chunkOverlap = 0; let pdfExtractImages = true; + let RAG_FULL_CONTEXT = false; + let enableGoogleDriveIntegration = false; let OpenAIUrl = ''; @@ -182,6 +183,7 @@ max_size: fileMaxSize === '' ? null : fileMaxSize, max_count: fileMaxCount === '' ? null : fileMaxCount }, + RAG_FULL_CONTEXT: RAG_FULL_CONTEXT, chunk: { text_splitter: textSplitter, chunk_overlap: chunkOverlap, @@ -242,6 +244,8 @@ chunkSize = res.chunk.chunk_size; chunkOverlap = res.chunk.chunk_overlap; + RAG_FULL_CONTEXT = res.RAG_FULL_CONTEXT; + contentExtractionEngine = res.content_extraction.engine; tikaServerUrl = res.content_extraction.tika_server_url; showTikaServerUrl = contentExtractionEngine === 'tika'; @@ -388,6 +392,19 @@ {/if}
    + +
    +
    {$i18n.t('Full Context Mode')}
    +
    + + + +
    +

    From 5465cabd40ee40e600a7154ab5e1be7068930664 Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Tue, 18 Feb 2025 21:17:09 -0800 Subject: [PATCH 56/96] refac --- backend/open_webui/retrieval/utils.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/backend/open_webui/retrieval/utils.py b/backend/open_webui/retrieval/utils.py index 887d6e02a..e39025978 100644 --- a/backend/open_webui/retrieval/utils.py +++ b/backend/open_webui/retrieval/utils.py @@ -404,8 +404,6 @@ def get_sources_from_files( if full_context: try: context = get_all_items_from_collections(collection_names) - - print("context", context) except Exception as e: log.exception(e) From 9ca47275730487351deca40fd7d31dc8600d92ca Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Tue, 18 Feb 2025 21:29:27 -0800 Subject: [PATCH 57/96] enh: web search behaviour --- backend/open_webui/utils/middleware.py | 123 +++++++++++++------------ 1 file changed, 64 insertions(+), 59 deletions(-) diff --git a/backend/open_webui/utils/middleware.py b/backend/open_webui/utils/middleware.py index b624f2a34..baa4d49a4 100644 --- a/backend/open_webui/utils/middleware.py +++ b/backend/open_webui/utils/middleware.py @@ -321,89 +321,94 @@ async def chat_web_search_handler( ) return form_data - searchQuery = queries[0] + all_results = [] - await event_emitter( - { - "type": "status", - "data": { - "action": "web_search", - "description": 'Searching "{{searchQuery}}"', - "query": searchQuery, - "done": False, - }, - } - ) - - try: - - results = await process_web_search( - request, - SearchForm( - **{ + for searchQuery in queries: + await event_emitter( + { + "type": "status", + "data": { + "action": "web_search", + "description": 'Searching "{{searchQuery}}"', "query": searchQuery, - } - ), - user=user, + "done": False, + }, + } ) - if results: - await event_emitter( - { - "type": "status", - "data": { - "action": "web_search", - "description": "Searched {{count}} sites", + try: + results = await process_web_search( + request, + SearchForm( + **{ "query": searchQuery, - "urls": results["filenames"], - "done": True, - }, - } + } + ), + user=user, ) - files = form_data.get("files", []) + if results: + all_results.append(results) + files = form_data.get("files", []) - if request.app.state.config.RAG_WEB_SEARCH_FULL_CONTEXT: - files.append( - { - "docs": results.get("docs", []), - "name": searchQuery, - "type": "web_search_docs", - "urls": results["filenames"], - } - ) - else: - files.append( - { - "collection_name": results["collection_name"], - "name": searchQuery, - "type": "web_search_results", - "urls": results["filenames"], - } - ) - form_data["files"] = files - else: + if request.app.state.config.RAG_WEB_SEARCH_FULL_CONTEXT: + files.append( + { + "docs": results.get("docs", []), + "name": searchQuery, + "type": "web_search_docs", + "urls": results["filenames"], + } + ) + else: + files.append( + { + "collection_name": results["collection_name"], + "name": searchQuery, + "type": "web_search_results", + "urls": results["filenames"], + } + ) + form_data["files"] = files + except Exception as e: + log.exception(e) await event_emitter( { "type": "status", "data": { "action": "web_search", - "description": "No search results found", + "description": 'Error searching "{{searchQuery}}"', "query": searchQuery, "done": True, "error": True, }, } ) - except Exception as e: - log.exception(e) + + if all_results: + urls = [] + for results in all_results: + if "filenames" in results: + urls.extend(results["filenames"]) + await event_emitter( { "type": "status", "data": { "action": "web_search", - "description": 'Error searching "{{searchQuery}}"', - "query": searchQuery, + "description": "Searched {{count}} sites", + "urls": urls, + "done": True, + }, + } + ) + else: + await event_emitter( + { + "type": "status", + "data": { + "action": "web_search", + "description": "No search results found", "done": True, "error": True, }, From 8f72f8def92b00023b58ad6c951e071dbc3658b8 Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Tue, 18 Feb 2025 23:44:54 -0800 Subject: [PATCH 58/96] fix: styling --- src/lib/components/chat/MessageInput.svelte | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/components/chat/MessageInput.svelte b/src/lib/components/chat/MessageInput.svelte index d18c7d4d2..5cde963ee 100644 --- a/src/lib/components/chat/MessageInput.svelte +++ b/src/lib/components/chat/MessageInput.svelte @@ -430,7 +430,7 @@ {/if} - {#if webSearchEnabled || ($settings?.webSearch ?? false) === 'always'} + {#if webSearchEnabled || ($config?.features?.enable_web_search && ($settings?.webSearch ?? false)) === 'always'}
    From c073b8b4eea0bf0b903012a12d1f6d434e467aa3 Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Tue, 18 Feb 2025 23:49:27 -0800 Subject: [PATCH 59/96] refac --- backend/open_webui/retrieval/utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/backend/open_webui/retrieval/utils.py b/backend/open_webui/retrieval/utils.py index e39025978..5f181fba0 100644 --- a/backend/open_webui/retrieval/utils.py +++ b/backend/open_webui/retrieval/utils.py @@ -154,15 +154,18 @@ def merge_get_results(get_results: list[dict]) -> dict: # Initialize lists to store combined data combined_documents = [] combined_metadatas = [] + combined_ids = [] for data in get_results: combined_documents.extend(data["documents"][0]) combined_metadatas.extend(data["metadatas"][0]) + combined_ids.extend(data["ids"][0]) # Create the output dictionary result = { "documents": [combined_documents], "metadatas": [combined_metadatas], + "ids": [combined_ids], } return result From dab6396ebe79c9b32734b1eb047b9ee3b54365c7 Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Wed, 19 Feb 2025 00:35:01 -0800 Subject: [PATCH 60/96] fix: inline citations --- src/lib/components/chat/Messages/Citations.svelte | 5 +++-- .../chat/Messages/Markdown/MarkdownInlineTokens.svelte | 2 +- .../components/chat/Messages/Markdown/Source.svelte | 3 ++- .../components/chat/Messages/ResponseMessage.svelte | 10 +++++----- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/lib/components/chat/Messages/Citations.svelte b/src/lib/components/chat/Messages/Citations.svelte index f8d57cbb7..095e29edf 100644 --- a/src/lib/components/chat/Messages/Citations.svelte +++ b/src/lib/components/chat/Messages/Citations.svelte @@ -7,6 +7,7 @@ const i18n = getContext('i18n'); + export let id = ''; export let sources = []; let citations = []; @@ -100,7 +101,7 @@
    {#each citations as citation, idx}