diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index e0bed082b..950a379cd 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -1863,43 +1863,44 @@ DATALAB_MARKER_LANGS = PersistentConfig( DATALAB_MARKER_USE_LLM = PersistentConfig( "DATALAB_MARKER_USE_LLM", "rag.DATALAB_MARKER_USE_LLM", - os.environ.get("DATALAB_MARKER_USE_LLM", "false") == "true", + os.environ.get("DATALAB_MARKER_USE_LLM", "false").lower() == "true", ) DATALAB_MARKER_SKIP_CACHE = PersistentConfig( "DATALAB_MARKER_SKIP_CACHE", "rag.datalab_marker_skip_cache", - os.environ.get("DATALAB_MARKER_SKIP_CACHE", "false") == "true", + os.environ.get("DATALAB_MARKER_SKIP_CACHE", "false").lower() == "true", ) DATALAB_MARKER_FORCE_OCR = PersistentConfig( "DATALAB_MARKER_FORCE_OCR", "rag.datalab_marker_force_ocr", - os.environ.get("DATALAB_MARKER_FORCE_OCR", "false") == "true", + os.environ.get("DATALAB_MARKER_FORCE_OCR", "false").lower() == "true", ) DATALAB_MARKER_PAGINATE = PersistentConfig( "DATALAB_MARKER_PAGINATE", "rag.datalab_marker_paginate", - os.environ.get("DATALAB_MARKER_PAGINATE", "false") == "true", + os.environ.get("DATALAB_MARKER_PAGINATE", "false").lower() == "true", ) DATALAB_MARKER_STRIP_EXISTING_OCR = PersistentConfig( "DATALAB_MARKER_STRIP_EXISTING_OCR", "rag.datalab_marker_strip_existing_ocr", - os.environ.get("DATALAB_MARKER_STRIP_EXISTING_OCR", "false") == "true", + os.environ.get("DATALAB_MARKER_STRIP_EXISTING_OCR", "false").lower() == "true", ) DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION = PersistentConfig( "DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION", "rag.datalab_marker_disable_image_extraction", - os.environ.get("DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION", "false") == "true", + os.environ.get("DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION", "false").lower() + == "true", ) DATALAB_MARKER_OUTPUT_FORMAT = PersistentConfig( "DATALAB_MARKER_OUTPUT_FORMAT", "rag.datalab_marker_output_format", - os.environ.get("DATALAB_MARKER_OUTPUT_FORMAT", ""), + os.environ.get("DATALAB_MARKER_OUTPUT_FORMAT", "markdown"), ) EXTERNAL_DOCUMENT_LOADER_URL = PersistentConfig( diff --git a/src/lib/components/admin/Settings/Documents.svelte b/src/lib/components/admin/Settings/Documents.svelte index b5fb9b30e..f4f3202d7 100644 --- a/src/lib/components/admin/Settings/Documents.svelte +++ b/src/lib/components/admin/Settings/Documents.svelte @@ -58,27 +58,6 @@ }; let RAGConfig = null; - let selectedLanguages: string[] = ['en']; - let langsHydrated = false; - - const SUPPORTED_LANGUAGES = { - "af": "Afrikaans", "am": "Amharic", "ar": "Arabic", "as": "Assamese", "az": "Azerbaijani", "be": "Belarusian", - "bg": "Bulgarian", "bn": "Bengali", "br": "Breton", "bs": "Bosnian", "ca": "Catalan", "cs": "Czech", - "cy": "Welsh", "da": "Danish", "de": "German", "el": "Greek", "en": "English", "eo": "Esperanto", - "es": "Spanish", "et": "Estonian", "eu": "Basque", "fa": "Persian", "fi": "Finnish", "fr": "French", - "fy": "Western Frisian", "ga": "Irish", "gd": "Scottish Gaelic", "gl": "Galician", "gu": "Gujarati", - "ha": "Hausa", "he": "Hebrew", "hi": "Hindi", "hr": "Croatian", "hu": "Hungarian", "hy": "Armenian", - "id": "Indonesian", "is": "Icelandic", "it": "Italian", "ja": "Japanese", "jv": "Javanese", "ka": "Georgian", - "kk": "Kazakh", "km": "Khmer", "kn": "Kannada", "ko": "Korean", "ku": "Kurdish", "ky": "Kyrgyz", - "la": "Latin", "lo": "Lao", "lt": "Lithuanian", "lv": "Latvian", "mg": "Malagasy", "mk": "Macedonian", - "ml": "Malayalam", "mn": "Mongolian", "mr": "Marathi", "ms": "Malay", "my": "Burmese", "ne": "Nepali", - "nl": "Dutch", "no": "Norwegian", "om": "Oromo", "or": "Oriya", "pa": "Punjabi", "pl": "Polish", - "ps": "Pashto", "pt": "Portuguese", "ro": "Romanian", "ru": "Russian", "sa": "Sanskrit", "sd": "Sindhi", - "si": "Sinhala", "sk": "Slovak", "sl": "Slovenian", "so": "Somali", "sq": "Albanian", "sr": "Serbian", - "su": "Sundanese", "sv": "Swedish", "sw": "Swahili", "ta": "Tamil", "te": "Telugu", "th": "Thai", - "tl": "Tagalog", "tr": "Turkish", "ug": "Uyghur", "uk": "Ukrainian", "ur": "Urdu", "uz": "Uzbek", - "vi": "Vietnamese", "xh": "Xhosa", "yi": "Yiddish", "zh": "Chinese", "_math": "Math" - }; const embeddingModelUpdateHandler = async () => { if (embeddingEngine === '' && embeddingModel.split('/').length - 1 > 1) { @@ -145,10 +124,6 @@ }; const submitHandler = async () => { - if (RAGConfig.CONTENT_EXTRACTION_ENGINE === 'datalab_marker' && !RAGConfig.DATALAB_MARKER_API_KEY) { - toast.error($i18n.t('Datalab Marker API Key required.')); - return; - } if ( RAGConfig.CONTENT_EXTRACTION_ENGINE === 'external' && RAGConfig.EXTERNAL_DOCUMENT_LOADER_URL === '' @@ -175,6 +150,14 @@ return; } + if ( + RAGConfig.CONTENT_EXTRACTION_ENGINE === 'datalab_marker' && + !RAGConfig.DATALAB_MARKER_API_KEY + ) { + toast.error($i18n.t('Datalab Marker API Key required.')); + return; + } + if ( RAGConfig.CONTENT_EXTRACTION_ENGINE === 'document_intelligence' && (RAGConfig.DOCUMENT_INTELLIGENCE_ENDPOINT === '' || @@ -200,6 +183,11 @@ .map((ext) => ext.trim()) .filter((ext) => ext !== ''); + RAGConfig.DATALAB_MARKER_LANGS = RAGConfig.DATALAB_MARKER_LANGS.split(',') + .map((code) => code.trim()) + .filter((code) => code !== '') + .join(', '); + const res = await updateRAGConfig(localStorage.token, RAGConfig); dispatch('save'); }; @@ -224,27 +212,8 @@ const config = await getRAGConfig(localStorage.token); config.ALLOWED_FILE_EXTENSIONS = (config?.ALLOWED_FILE_EXTENSIONS ?? []).join(', '); - - if (!config.DATALAB_MARKER_OUTPUT_FORMAT) { - config.DATALAB_MARKER_OUTPUT_FORMAT = 'markdown'; - } - - if (config.DATALAB_MARKER_LANGS) { - selectedLanguages = config.DATALAB_MARKER_LANGS - .split(',') - .map(code => code.trim()) - .filter(Boolean); - } - RAGConfig = config; - langsHydrated = true; }); - - $: if (langsHydrated && RAGConfig) { - RAGConfig.DATALAB_MARKER_LANGS = selectedLanguages.length - ? selectedLanguages.join(',') - : 'en'; - } - + @@ -336,106 +305,136 @@ {:else if RAGConfig.CONTENT_EXTRACTION_ENGINE === 'datalab_marker'} -
- -
-
-
- - +
+
-
-
-
- - {$i18n.t('Use LLM')} - + +
+
+ {$i18n.t('Languages')} +
+ +
-
- + +
+
+ + {$i18n.t('Use LLM')} + +
+
+ +
+
+
+ + {$i18n.t('Skip Cache')} + +
+
+ +
-
-
- - {$i18n.t('Skip Cache')} - +
+
+ + {$i18n.t('Force OCR')} + +
+
+ +
-
- +
+
+ + {$i18n.t('Paginate')} + +
+
+ +
+
+
+ + {$i18n.t('Strip Existing OCR')} + +
+
+ +
-
-
- - {$i18n.t('Force OCR')} - -
-
- -
-
-
-
- - {$i18n.t('Paginate')} - -
-
- -
-
-
-
- - {$i18n.t('Strip Existing OCR')} - -
-
- -
-
-
-
- - {$i18n.t('Disable Image Extraction')} - -
-
- -
-
-
-
- - {$i18n.t('Output Format')} - -
-
- +
+
+ + {$i18n.t('Disable Image Extraction')} + +
+
+ +
+
+
+ + {$i18n.t('Output Format')} + +
+
+ +
{:else if RAGConfig.CONTENT_EXTRACTION_ENGINE === 'external'}