feat: compress audio

Co-Authored-By: Beck Bekmyradov <47065940+bekmuradov@users.noreply.github.com>
This commit is contained in:
Timothy J. Baek 2024-09-30 00:30:12 +02:00
parent 8206c47a47
commit 7152af949b
3 changed files with 139 additions and 97 deletions

View File

@ -5,6 +5,8 @@ import os
import uuid import uuid
from functools import lru_cache from functools import lru_cache
from pathlib import Path from pathlib import Path
from pydub import AudioSegment
from pydub.silence import split_on_silence
import requests import requests
from open_webui.config import ( from open_webui.config import (
@ -35,7 +37,12 @@ from fastapi import Depends, FastAPI, File, HTTPException, Request, UploadFile,
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse from fastapi.responses import FileResponse
from pydantic import BaseModel from pydantic import BaseModel
from open_webui.utils.utils import get_admin_user, get_current_user, get_verified_user from open_webui.utils.utils import get_admin_user, get_verified_user
# Constants
MAX_FILE_SIZE_MB = 25
MAX_FILE_SIZE = MAX_FILE_SIZE_MB * 1024 * 1024 # Convert MB to bytes
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
log.setLevel(SRC_LOG_LEVELS["AUDIO"]) log.setLevel(SRC_LOG_LEVELS["AUDIO"])
@ -353,35 +360,11 @@ async def speech(request: Request, user=Depends(get_verified_user)):
) )
@app.post("/transcriptions") def transcribe(file_path):
def transcribe( print("transcribe", file_path)
file: UploadFile = File(...), filename = os.path.basename(file_path)
user=Depends(get_current_user), file_dir = os.path.dirname(file_path)
): id = filename.split(".")[0]
log.info(f"file.content_type: {file.content_type}")
if file.content_type not in ["audio/mpeg", "audio/wav", "audio/ogg", "audio/x-m4a"]:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=ERROR_MESSAGES.FILE_NOT_SUPPORTED,
)
try:
ext = file.filename.split(".")[-1]
id = uuid.uuid4()
filename = f"{id}.{ext}"
file_dir = f"{CACHE_DIR}/audio/transcriptions"
os.makedirs(file_dir, exist_ok=True)
file_path = f"{file_dir}/{filename}"
print(filename)
contents = file.file.read()
with open(file_path, "wb") as f:
f.write(contents)
f.close()
if app.state.config.STT_ENGINE == "": if app.state.config.STT_ENGINE == "":
from faster_whisper import WhisperModel from faster_whisper import WhisperModel
@ -421,9 +404,7 @@ def transcribe(
json.dump(data, f) json.dump(data, f)
print(data) print(data)
return data return data
elif app.state.config.STT_ENGINE == "openai": elif app.state.config.STT_ENGINE == "openai":
if is_mp4_audio(file_path): if is_mp4_audio(file_path):
print("is_mp4_audio") print("is_mp4_audio")
@ -469,9 +450,70 @@ def transcribe(
except Exception: except Exception:
error_detail = f"External: {e}" error_detail = f"External: {e}"
raise error_detail
@app.post("/transcriptions")
def transcription(
file: UploadFile = File(...),
user=Depends(get_verified_user),
):
log.info(f"file.content_type: {file.content_type}")
if file.content_type not in ["audio/mpeg", "audio/wav", "audio/ogg", "audio/x-m4a"]:
raise HTTPException( raise HTTPException(
status_code=r.status_code if r != None else 500, status_code=status.HTTP_400_BAD_REQUEST,
detail=error_detail, detail=ERROR_MESSAGES.FILE_NOT_SUPPORTED,
)
try:
ext = file.filename.split(".")[-1]
id = uuid.uuid4()
filename = f"{id}.{ext}"
contents = file.file.read()
file_dir = f"{CACHE_DIR}/audio/transcriptions"
os.makedirs(file_dir, exist_ok=True)
file_path = f"{file_dir}/{filename}"
with open(file_path, "wb") as f:
f.write(contents)
try:
if os.path.getsize(file_path) > MAX_FILE_SIZE: # file is bigger than 25MB
log.debug(f"File size is larger than {MAX_FILE_SIZE_MB}MB")
audio = AudioSegment.from_file(file_path)
audio = audio.set_frame_rate(16000).set_channels(1) # Compress audio
compressed_path = f"{file_dir}/{id}_compressed.opus"
audio.export(compressed_path, format="opus", bitrate="32k")
log.debug(f"Compressed audio to {compressed_path}")
file_path = compressed_path
if (
os.path.getsize(file_path) > MAX_FILE_SIZE
): # Still larger than 25MB after compression
chunks = split_on_silence(
audio, min_silence_len=500, silence_thresh=-40
)
texts = []
for i, chunk in enumerate(chunks):
chunk_file_path = f"{file_dir}/{id}_chunk{i}.{ext}"
chunk.export(chunk_file_path, format=ext)
text = transcribe(chunk_file_path)
texts.append(text)
data = {"text": " ".join(texts)}
else:
data = transcribe(file_path)
else:
data = transcribe(file_path)
return data
except Exception as e:
log.exception(e)
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=ERROR_MESSAGES.DEFAULT(e),
) )
except Exception as e: except Exception as e:

View File

@ -700,7 +700,7 @@
childrenIds: [], childrenIds: [],
role: 'user', role: 'user',
content: userPrompt, content: userPrompt,
files: chatFiles.length > 0 ? chatFiles : undefined, files: _files.length > 0 ? _files : undefined,
timestamp: Math.floor(Date.now() / 1000), // Unix epoch timestamp: Math.floor(Date.now() / 1000), // Unix epoch
models: selectedModels models: selectedModels
}; };

View File

@ -54,7 +54,7 @@
</div> </div>
<div> <div>
<div class="flex flex-col md:flex-row gap-1 justify-between w-full"> <div class="flex flex-col items-center md:flex-row gap-1 justify-between w-full">
<div class=" flex flex-wrap text-sm gap-1 text-gray-500"> <div class=" flex flex-wrap text-sm gap-1 text-gray-500">
{#if file.size} {#if file.size}
<div class="capitalize shrink-0">{formatFileSize(file.size)}</div> <div class="capitalize shrink-0">{formatFileSize(file.size)}</div>