mirror of
https://git.mirrors.martin98.com/https://github.com/open-webui/open-webui
synced 2025-08-20 12:39:13 +08:00
feat: bypass web loader in web search
Co-Authored-By: Perry Li <peiyaoli@mail.nankai.edu.cn> Co-Authored-By: WilliamGates <3852641+williamgateszhao@users.noreply.github.com>
This commit is contained in:
parent
0b7f927983
commit
2eca6f6414
@ -2177,6 +2177,12 @@ BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = PersistentConfig(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
BYPASS_WEB_SEARCH_WEB_LOADER = PersistentConfig(
|
||||||
|
"BYPASS_WEB_SEARCH_WEB_LOADER",
|
||||||
|
"rag.web.search.bypass_web_loader",
|
||||||
|
os.getenv("BYPASS_WEB_SEARCH_WEB_LOADER", "False").lower() == "true",
|
||||||
|
)
|
||||||
|
|
||||||
WEB_SEARCH_RESULT_COUNT = PersistentConfig(
|
WEB_SEARCH_RESULT_COUNT = PersistentConfig(
|
||||||
"WEB_SEARCH_RESULT_COUNT",
|
"WEB_SEARCH_RESULT_COUNT",
|
||||||
"rag.web.search.result_count",
|
"rag.web.search.result_count",
|
||||||
@ -2202,6 +2208,7 @@ WEB_SEARCH_CONCURRENT_REQUESTS = PersistentConfig(
|
|||||||
int(os.getenv("WEB_SEARCH_CONCURRENT_REQUESTS", "10")),
|
int(os.getenv("WEB_SEARCH_CONCURRENT_REQUESTS", "10")),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
WEB_LOADER_ENGINE = PersistentConfig(
|
WEB_LOADER_ENGINE = PersistentConfig(
|
||||||
"WEB_LOADER_ENGINE",
|
"WEB_LOADER_ENGINE",
|
||||||
"rag.web.loader.engine",
|
"rag.web.loader.engine",
|
||||||
|
@ -228,6 +228,7 @@ from open_webui.config import (
|
|||||||
ENABLE_WEB_SEARCH,
|
ENABLE_WEB_SEARCH,
|
||||||
WEB_SEARCH_ENGINE,
|
WEB_SEARCH_ENGINE,
|
||||||
BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL,
|
BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL,
|
||||||
|
BYPASS_WEB_SEARCH_WEB_LOADER,
|
||||||
WEB_SEARCH_RESULT_COUNT,
|
WEB_SEARCH_RESULT_COUNT,
|
||||||
WEB_SEARCH_CONCURRENT_REQUESTS,
|
WEB_SEARCH_CONCURRENT_REQUESTS,
|
||||||
WEB_SEARCH_TRUST_ENV,
|
WEB_SEARCH_TRUST_ENV,
|
||||||
@ -707,6 +708,7 @@ app.state.config.WEB_SEARCH_TRUST_ENV = WEB_SEARCH_TRUST_ENV
|
|||||||
app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = (
|
app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = (
|
||||||
BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL
|
BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL
|
||||||
)
|
)
|
||||||
|
app.state.config.BYPASS_WEB_SEARCH_WEB_LOADER = BYPASS_WEB_SEARCH_WEB_LOADER
|
||||||
|
|
||||||
app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION = ENABLE_GOOGLE_DRIVE_INTEGRATION
|
app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION = ENABLE_GOOGLE_DRIVE_INTEGRATION
|
||||||
app.state.config.ENABLE_ONEDRIVE_INTEGRATION = ENABLE_ONEDRIVE_INTEGRATION
|
app.state.config.ENABLE_ONEDRIVE_INTEGRATION = ENABLE_ONEDRIVE_INTEGRATION
|
||||||
|
@ -387,6 +387,7 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)):
|
|||||||
"WEB_SEARCH_CONCURRENT_REQUESTS": request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS,
|
"WEB_SEARCH_CONCURRENT_REQUESTS": request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS,
|
||||||
"WEB_SEARCH_DOMAIN_FILTER_LIST": request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST,
|
"WEB_SEARCH_DOMAIN_FILTER_LIST": request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST,
|
||||||
"BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL": request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL,
|
"BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL": request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL,
|
||||||
|
"BYPASS_WEB_SEARCH_WEB_LOADER": request.app.state.config.BYPASS_WEB_SEARCH_WEB_LOADER,
|
||||||
"SEARXNG_QUERY_URL": request.app.state.config.SEARXNG_QUERY_URL,
|
"SEARXNG_QUERY_URL": request.app.state.config.SEARXNG_QUERY_URL,
|
||||||
"YACY_QUERY_URL": request.app.state.config.YACY_QUERY_URL,
|
"YACY_QUERY_URL": request.app.state.config.YACY_QUERY_URL,
|
||||||
"YACY_USERNAME": request.app.state.config.YACY_USERNAME,
|
"YACY_USERNAME": request.app.state.config.YACY_USERNAME,
|
||||||
@ -439,6 +440,7 @@ class WebConfig(BaseModel):
|
|||||||
WEB_SEARCH_CONCURRENT_REQUESTS: Optional[int] = None
|
WEB_SEARCH_CONCURRENT_REQUESTS: Optional[int] = None
|
||||||
WEB_SEARCH_DOMAIN_FILTER_LIST: Optional[List[str]] = []
|
WEB_SEARCH_DOMAIN_FILTER_LIST: Optional[List[str]] = []
|
||||||
BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL: Optional[bool] = None
|
BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL: Optional[bool] = None
|
||||||
|
BYPASS_WEB_SEARCH_WEB_LOADER: Optional[bool] = None
|
||||||
SEARXNG_QUERY_URL: Optional[str] = None
|
SEARXNG_QUERY_URL: Optional[str] = None
|
||||||
YACY_QUERY_URL: Optional[str] = None
|
YACY_QUERY_URL: Optional[str] = None
|
||||||
YACY_USERNAME: Optional[str] = None
|
YACY_USERNAME: Optional[str] = None
|
||||||
@ -751,6 +753,9 @@ async def update_rag_config(
|
|||||||
request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = (
|
request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = (
|
||||||
form_data.web.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL
|
form_data.web.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL
|
||||||
)
|
)
|
||||||
|
request.app.state.config.BYPASS_WEB_SEARCH_WEB_LOADER = (
|
||||||
|
form_data.web.BYPASS_WEB_SEARCH_WEB_LOADER
|
||||||
|
)
|
||||||
request.app.state.config.SEARXNG_QUERY_URL = form_data.web.SEARXNG_QUERY_URL
|
request.app.state.config.SEARXNG_QUERY_URL = form_data.web.SEARXNG_QUERY_URL
|
||||||
request.app.state.config.YACY_QUERY_URL = form_data.web.YACY_QUERY_URL
|
request.app.state.config.YACY_QUERY_URL = form_data.web.YACY_QUERY_URL
|
||||||
request.app.state.config.YACY_USERNAME = form_data.web.YACY_USERNAME
|
request.app.state.config.YACY_USERNAME = form_data.web.YACY_USERNAME
|
||||||
@ -875,6 +880,7 @@ async def update_rag_config(
|
|||||||
"WEB_SEARCH_CONCURRENT_REQUESTS": request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS,
|
"WEB_SEARCH_CONCURRENT_REQUESTS": request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS,
|
||||||
"WEB_SEARCH_DOMAIN_FILTER_LIST": request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST,
|
"WEB_SEARCH_DOMAIN_FILTER_LIST": request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST,
|
||||||
"BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL": request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL,
|
"BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL": request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL,
|
||||||
|
"BYPASS_WEB_SEARCH_WEB_LOADER": request.app.state.config.BYPASS_WEB_SEARCH_WEB_LOADER,
|
||||||
"SEARXNG_QUERY_URL": request.app.state.config.SEARXNG_QUERY_URL,
|
"SEARXNG_QUERY_URL": request.app.state.config.SEARXNG_QUERY_URL,
|
||||||
"YACY_QUERY_URL": request.app.state.config.YACY_QUERY_URL,
|
"YACY_QUERY_URL": request.app.state.config.YACY_QUERY_URL,
|
||||||
"YACY_USERNAME": request.app.state.config.YACY_USERNAME,
|
"YACY_USERNAME": request.app.state.config.YACY_USERNAME,
|
||||||
@ -1678,13 +1684,29 @@ async def process_web_search(
|
|||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
loader = get_web_loader(
|
if request.app.state.config.BYPASS_WEB_SEARCH_WEB_LOADER:
|
||||||
urls,
|
docs = [
|
||||||
verify_ssl=request.app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION,
|
Document(
|
||||||
requests_per_second=request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS,
|
page_content=result.snippet,
|
||||||
trust_env=request.app.state.config.WEB_SEARCH_TRUST_ENV,
|
metadata={
|
||||||
)
|
"source": result.link,
|
||||||
docs = await loader.aload()
|
"title": result.title,
|
||||||
|
"snippet": result.snippet,
|
||||||
|
"link": result.link,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
for result in search_results
|
||||||
|
if hasattr(result, "snippet")
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
loader = get_web_loader(
|
||||||
|
urls,
|
||||||
|
verify_ssl=request.app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION,
|
||||||
|
requests_per_second=request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS,
|
||||||
|
trust_env=request.app.state.config.WEB_SEARCH_TRUST_ENV,
|
||||||
|
)
|
||||||
|
docs = await loader.aload()
|
||||||
|
|
||||||
urls = [
|
urls = [
|
||||||
doc.metadata.get("source") for doc in docs if doc.metadata.get("source")
|
doc.metadata.get("source") for doc in docs if doc.metadata.get("source")
|
||||||
] # only keep the urls returned by the loader
|
] # only keep the urls returned by the loader
|
||||||
|
@ -613,6 +613,19 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<div class=" mb-2.5 flex w-full justify-between">
|
||||||
|
<div class=" self-center text-xs font-medium">
|
||||||
|
<Tooltip content={$i18n.t('Bypass Web Loader')} placement="top-start">
|
||||||
|
{$i18n.t('Bypass Web Loader')}
|
||||||
|
</Tooltip>
|
||||||
|
</div>
|
||||||
|
<div class="flex items-center relative">
|
||||||
|
<Tooltip content={''}>
|
||||||
|
<Switch bind:state={webConfig.BYPASS_WEB_SEARCH_WEB_LOADER} />
|
||||||
|
</Tooltip>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<div class=" mb-2.5 flex w-full justify-between">
|
<div class=" mb-2.5 flex w-full justify-between">
|
||||||
<div class=" self-center text-xs font-medium">
|
<div class=" self-center text-xs font-medium">
|
||||||
{$i18n.t('Trust Proxy Environment')}
|
{$i18n.t('Trust Proxy Environment')}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user