feat: compress audio

Co-Authored-By: Beck Bekmyradov <47065940+bekmuradov@users.noreply.github.com>
2025-08-17 14:46:00 +08:00 · 2024-09-30 00:30:12 +02:00 · 2024-09-30 00:30:12 +02:00 · 7152af949b
commit 7152af949b
parent 8206c47a47
3 changed files with 139 additions and 97 deletions
--- a/backend/open_webui/apps/audio/main.py
+++ b/backend/open_webui/apps/audio/main.py
@ -5,6 +5,8 @@ import os
 import uuid
 from functools import lru_cache
 from pathlib import Path
 from pydub import AudioSegment
 from pydub.silence import split_on_silence
 import requests
 from open_webui.config import (
@ -35,7 +37,12 @@ from fastapi import Depends, FastAPI, File, HTTPException, Request, UploadFile,
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import FileResponse
 from pydantic import BaseModel
-from open_webui.utils.utils import get_admin_user, get_current_user, get_verified_user
+from open_webui.utils.utils import get_admin_user, get_verified_user
 # Constants
 MAX_FILE_SIZE_MB = 25
 MAX_FILE_SIZE = MAX_FILE_SIZE_MB * 1024 * 1024  # Convert MB to bytes
 log = logging.getLogger(__name__)
 log.setLevel(SRC_LOG_LEVELS["AUDIO"])
@ -353,35 +360,11 @@ async def speech(request: Request, user=Depends(get_verified_user)):
            )
-@app.post("/transcriptions")
+def transcribe(file_path):
-def transcribe(
+    print("transcribe", file_path)
-    file: UploadFile = File(...),
+    filename = os.path.basename(file_path)
-    user=Depends(get_current_user),
+    file_dir = os.path.dirname(file_path)
-):
+    id = filename.split(".")[0]
    log.info(f"file.content_type: {file.content_type}")
    if file.content_type not in ["audio/mpeg", "audio/wav", "audio/ogg", "audio/x-m4a"]:
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail=ERROR_MESSAGES.FILE_NOT_SUPPORTED,
        )
    try:
        ext = file.filename.split(".")[-1]
        id = uuid.uuid4()
        filename = f"{id}.{ext}"
        file_dir = f"{CACHE_DIR}/audio/transcriptions"
        os.makedirs(file_dir, exist_ok=True)
        file_path = f"{file_dir}/{filename}"
        print(filename)
        contents = file.file.read()
        with open(file_path, "wb") as f:
            f.write(contents)
            f.close()
    if app.state.config.STT_ENGINE == "":
        from faster_whisper import WhisperModel
@ -421,9 +404,7 @@ def transcribe(
            json.dump(data, f)
        print(data)
        return data
    elif app.state.config.STT_ENGINE == "openai":
        if is_mp4_audio(file_path):
            print("is_mp4_audio")
@ -469,9 +450,70 @@ def transcribe(
                except Exception:
                    error_detail = f"External: {e}"
            raise error_detail
@app.post("/transcriptions")
 def transcription(
    file: UploadFile = File(...),
    user=Depends(get_verified_user),
 ):
    log.info(f"file.content_type: {file.content_type}")
    if file.content_type not in ["audio/mpeg", "audio/wav", "audio/ogg", "audio/x-m4a"]:
        raise HTTPException(
-                    status_code=r.status_code if r != None else 500,
+            status_code=status.HTTP_400_BAD_REQUEST,
-                    detail=error_detail,
+            detail=ERROR_MESSAGES.FILE_NOT_SUPPORTED,
        )
    try:
        ext = file.filename.split(".")[-1]
        id = uuid.uuid4()
        filename = f"{id}.{ext}"
        contents = file.file.read()
        file_dir = f"{CACHE_DIR}/audio/transcriptions"
        os.makedirs(file_dir, exist_ok=True)
        file_path = f"{file_dir}/{filename}"
        with open(file_path, "wb") as f:
            f.write(contents)
        try:
            if os.path.getsize(file_path) > MAX_FILE_SIZE:  # file is bigger than 25MB
                log.debug(f"File size is larger than {MAX_FILE_SIZE_MB}MB")
                audio = AudioSegment.from_file(file_path)
                audio = audio.set_frame_rate(16000).set_channels(1)  # Compress audio
                compressed_path = f"{file_dir}/{id}_compressed.opus"
                audio.export(compressed_path, format="opus", bitrate="32k")
                log.debug(f"Compressed audio to {compressed_path}")
                file_path = compressed_path
                if (
                    os.path.getsize(file_path) > MAX_FILE_SIZE
                ):  # Still larger than 25MB after compression
                    chunks = split_on_silence(
                        audio, min_silence_len=500, silence_thresh=-40
                    )
                    texts = []
                    for i, chunk in enumerate(chunks):
                        chunk_file_path = f"{file_dir}/{id}_chunk{i}.{ext}"
                        chunk.export(chunk_file_path, format=ext)
                        text = transcribe(chunk_file_path)
                        texts.append(text)
                    data = {"text": " ".join(texts)}
                else:
                    data = transcribe(file_path)
            else:
                data = transcribe(file_path)
            return data
        except Exception as e:
            log.exception(e)
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
                detail=ERROR_MESSAGES.DEFAULT(e),
            )
    except Exception as e:
--- a/src/lib/components/chat/Chat.svelte
+++ b/src/lib/components/chat/Chat.svelte
@ -700,7 +700,7 @@
 				childrenIds: [],
 				role: 'user',
 				content: userPrompt,
-				files: chatFiles.length > 0 ? chatFiles : undefined,
+				files: _files.length > 0 ? _files : undefined,
 				timestamp: Math.floor(Date.now() / 1000), // Unix epoch
 				models: selectedModels
 			};
--- a/src/lib/components/common/FileItemModal.svelte
+++ b/src/lib/components/common/FileItemModal.svelte
@ -54,7 +54,7 @@
 			</div>
 			<div>
-				<div class="flex flex-col md:flex-row gap-1 justify-between w-full">
+				<div class="flex flex-col items-center md:flex-row gap-1 justify-between w-full">
 					<div class=" flex flex-wrap text-sm gap-1 text-gray-500">
 						{#if file.size}
 							<div class="capitalize shrink-0">{formatFileSize(file.size)}</div>