From e130ad74d147a6871df9a9f043a16b89577b2f75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikael=20Tur=C3=B8y?= Date: Wed, 12 Jun 2024 22:37:35 +0200 Subject: [PATCH 01/34] Added timeout setting for ollama streaming response --- TROUBLESHOOTING.md | 4 ++++ backend/apps/ollama/main.py | 3 ++- backend/config.py | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/TROUBLESHOOTING.md b/TROUBLESHOOTING.md index 8e8f89da0..d1a6dba28 100644 --- a/TROUBLESHOOTING.md +++ b/TROUBLESHOOTING.md @@ -18,6 +18,10 @@ If you're experiencing connection issues, it’s often due to the WebUI docker c docker run -d --network=host -v open-webui:/app/backend/data -e OLLAMA_BASE_URL=http://127.0.0.1:11434 --name open-webui --restart always ghcr.io/open-webui/open-webui:main ``` +### Error on Slow Reponses for Ollama + +Open WebUI has a default timeout of 15 minutes for Ollama to finish generating the response. If needed, this can be adjusted via the environment variable OLLAMA_GENERATE_TIMEOUT, which sets the timeout in seconds. + ### General Connection Errors **Ensure Ollama Version is Up-to-Date**: Always start by checking that you have the latest version of Ollama. Visit [Ollama's official site](https://ollama.com/) for the latest updates. diff --git a/backend/apps/ollama/main.py b/backend/apps/ollama/main.py index 144755418..f9a906986 100644 --- a/backend/apps/ollama/main.py +++ b/backend/apps/ollama/main.py @@ -46,6 +46,7 @@ from config import ( SRC_LOG_LEVELS, OLLAMA_BASE_URLS, ENABLE_OLLAMA_API, + OLLAMA_GENERATE_TIMEOUT, ENABLE_MODEL_FILTER, MODEL_FILTER_LIST, UPLOAD_DIR, @@ -154,7 +155,7 @@ async def cleanup_response( async def post_streaming_url(url: str, payload: str): r = None try: - session = aiohttp.ClientSession(trust_env=True) + session = aiohttp.ClientSession(trust_env=True, timeout=aiohttp.ClientTimeout(total=OLLAMA_GENERATE_TIMEOUT)) r = await session.post(url, data=payload) r.raise_for_status() diff --git a/backend/config.py b/backend/config.py index 30a23f29e..995a48a01 100644 --- a/backend/config.py +++ b/backend/config.py @@ -425,6 +425,7 @@ OLLAMA_API_BASE_URL = os.environ.get( ) OLLAMA_BASE_URL = os.environ.get("OLLAMA_BASE_URL", "") +OLLAMA_GENERATE_TIMEOUT = int(os.environ.get("OLLAMA_GENERATE_TIMEOUT", "900")) K8S_FLAG = os.environ.get("K8S_FLAG", "") USE_OLLAMA_DOCKER = os.environ.get("USE_OLLAMA_DOCKER", "false") From 493e3068d806520b7fde713cd52740c236758a61 Mon Sep 17 00:00:00 2001 From: Peter De-Ath Date: Thu, 13 Jun 2024 02:01:50 +0100 Subject: [PATCH 02/34] enh: ability to edit memories --- backend/apps/webui/models/memories.py | 14 +++++ backend/apps/webui/routers/memories.py | 23 ++++++++ src/lib/apis/memories/index.ts | 31 +++++++++++ .../Personalization/AddMemoryModal.svelte | 52 +++++++++++++++++-- .../Personalization/ManageModal.svelte | 26 ++++++++-- src/lib/i18n/locales/en-GB/translation.json | 2 + 6 files changed, 141 insertions(+), 7 deletions(-) diff --git a/backend/apps/webui/models/memories.py b/backend/apps/webui/models/memories.py index 70e5577e9..0266cc8b2 100644 --- a/backend/apps/webui/models/memories.py +++ b/backend/apps/webui/models/memories.py @@ -64,6 +64,20 @@ class MemoriesTable: return memory else: return None + + def update_memory( + self, + id: str, + content: str, + ) -> Optional[MemoryModel]: + try: + memory = Memory.get(Memory.id == id) + memory.content = content + memory.updated_at = int(time.time()) + memory.save() + return MemoryModel(**model_to_dict(memory)) + except: + return None def get_memories(self) -> List[MemoryModel]: try: diff --git a/backend/apps/webui/routers/memories.py b/backend/apps/webui/routers/memories.py index 6448ebe1e..927c28b46 100644 --- a/backend/apps/webui/routers/memories.py +++ b/backend/apps/webui/routers/memories.py @@ -43,6 +43,8 @@ async def get_memories(user=Depends(get_verified_user)): class AddMemoryForm(BaseModel): content: str +class MemoryUpdateModel(BaseModel): + content: Optional[str] = None @router.post("/add", response_model=Optional[MemoryModel]) async def add_memory( @@ -62,6 +64,27 @@ async def add_memory( return memory +@router.patch("/{memory_id}", response_model=Optional[MemoryModel]) +async def update_memory( + memory_id: str, request: Request, form_data: MemoryUpdateModel, user=Depends(get_verified_user) +): + memory = Memories.update_memory(memory_id, form_data.content) + if memory is None: + raise HTTPException(status_code=404, detail="Memory not found") + + if form_data.content is not None: + memory_embedding = request.app.state.EMBEDDING_FUNCTION(form_data.content) + collection = CHROMA_CLIENT.get_or_create_collection(name=f"user-memory-{user.id}") + collection.upsert( + documents=[form_data.content], + ids=[memory.id], + embeddings=[memory_embedding], + metadatas=[{"created_at": memory.created_at, "updated_at": memory.updated_at}], + ) + + return memory + + ############################ # QueryMemory ############################ diff --git a/src/lib/apis/memories/index.ts b/src/lib/apis/memories/index.ts index 6cbb89f14..cc4abb176 100644 --- a/src/lib/apis/memories/index.ts +++ b/src/lib/apis/memories/index.ts @@ -59,6 +59,37 @@ export const addNewMemory = async (token: string, content: string) => { return res; }; +export const updateMemoryById = async (token: string, id: string, content: string) => { + let error = null; + + const res = await fetch(`${WEBUI_API_BASE_URL}/memories/${id}`, { + method: 'PATCH', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/json', + authorization: `Bearer ${token}` + }, + body: JSON.stringify({ + content: content + }) + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .catch((err) => { + error = err.detail; + console.log(err); + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + export const queryMemory = async (token: string, content: string) => { let error = null; diff --git a/src/lib/components/chat/Settings/Personalization/AddMemoryModal.svelte b/src/lib/components/chat/Settings/Personalization/AddMemoryModal.svelte index 445b7f667..ff9476308 100644 --- a/src/lib/components/chat/Settings/Personalization/AddMemoryModal.svelte +++ b/src/lib/components/chat/Settings/Personalization/AddMemoryModal.svelte @@ -2,21 +2,60 @@ import { createEventDispatcher, getContext } from 'svelte'; import Modal from '$lib/components/common/Modal.svelte'; - import { addNewMemory } from '$lib/apis/memories'; + import { addNewMemory, updateMemoryById } from '$lib/apis/memories'; import { toast } from 'svelte-sonner'; const dispatch = createEventDispatcher(); export let show; + export let memory = {}; + + let showUpdateBtn = false; const i18n = getContext('i18n'); let loading = false; let content = ''; + let isMemoryLoaded = false; + + $: { + if (memory && memory.id && !isMemoryLoaded) { + showUpdateBtn = true; + content = memory.content; + isMemoryLoaded = true; + } + if (!show) { + showUpdateBtn = false; + isMemoryLoaded = false; + memory = {}; + content = ''; + } + } const submitHandler = async () => { loading = true; + if (memory && memory.id) { + const res = await updateMemoryById(localStorage.token, memory.id, content).catch((error) => { + toast.error(error); + + return null; + }); + + if (res) { + console.log(res); + toast.success('Memory updated successfully'); + content = ''; + show = false; + isMemoryLoaded = false; + memory = {}; + dispatch('save'); + } + + loading = false; + return; + } + const res = await addNewMemory(localStorage.token, content).catch((error) => { toast.error(error); @@ -38,7 +77,9 @@
-
{$i18n.t('Add Memory')}
+
+ {memory.id ? $i18n.t('Edit Memory') : $i18n.t('Add Memory')} +
+ + +
+
+ {#if !$settings.chatBubble}
diff --git a/src/lib/utils/index.ts b/src/lib/utils/index.ts index 15ac73f1b..830f315bc 100644 --- a/src/lib/utils/index.ts +++ b/src/lib/utils/index.ts @@ -436,7 +436,7 @@ export const removeEmojis = (str) => { export const extractSentences = (text) => { // Split the paragraph into sentences based on common punctuation marks - const sentences = text.split(/(?<=[.!?])/); + const sentences = text.split(/(?<=[.!?])\s+/); return sentences .map((sentence) => removeEmojis(sentence.trim())) From 5300d2c531af7a961aea740ea96893399724401d Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Thu, 13 Jun 2024 01:28:15 -0700 Subject: [PATCH 04/34] refac --- backend/main.py | 2 +- src/lib/components/chat/Chat.svelte | 1 + .../chat/MessageInput/CallOverlay.svelte | 704 +++++++++--------- 3 files changed, 362 insertions(+), 345 deletions(-) diff --git a/backend/main.py b/backend/main.py index 9de4d7111..235ed421e 100644 --- a/backend/main.py +++ b/backend/main.py @@ -887,7 +887,7 @@ async def generate_emoji(form_data: dict, user=Depends(get_verified_user)): model = app.state.MODELS[model_id] template = ''' -You are a perceptive assistant skilled at interpreting emotions from a provided message. Your task is to reflect the speaker's likely facial expression through a fitting emoji. Prioritize using diverse facial expression emojis to convey the nuanced emotions expressed in the text. Please avoid using generic or overly ambiguous emojis like "🤔", and instead, choose ones that vividly represent the speaker's mood or reaction. +You are a perceptive assistant skilled at interpreting emotions from a provided message. Your task is to reflect the speaker's likely facial expression through a fitting emoji. Prioritize using diverse facial expression emojis to convey the nuanced emotions expressed in the text. Please choose ones that vividly represent the speaker's mood or reaction. Message: """{{prompt}}""" ''' diff --git a/src/lib/components/chat/Chat.svelte b/src/lib/components/chat/Chat.svelte index aa1462ff4..44a221ba6 100644 --- a/src/lib/components/chat/Chat.svelte +++ b/src/lib/components/chat/Chat.svelte @@ -1209,6 +1209,7 @@ { + const lastIndex = mergedTexts.length - 1; + if (lastIndex >= 0) { + const previousText = mergedTexts[lastIndex]; + const wordCount = previousText.split(/\s+/).length; + if (wordCount < 2) { + mergedTexts[lastIndex] = previousText + ' ' + currentText; + } else { + mergedTexts.push(currentText); + } + } else { + mergedTexts.push(currentText); + } + return mergedTexts; + }, []); let currentUtterance = null; + let rmsLevel = 0; + let hasStartedSpeaking = false; let mediaRecorder; let audioChunks = []; - const MIN_DECIBELS = -45; - const VISUALIZER_BUFFER_LENGTH = 300; - - // Function to calculate the RMS level from time domain data - const calculateRMS = (data: Uint8Array) => { - let sumSquares = 0; - for (let i = 0; i < data.length; i++) { - const normalizedValue = (data[i] - 128) / 128; // Normalize the data - sumSquares += normalizedValue * normalizedValue; - } - return Math.sqrt(sumSquares / data.length); - }; - - const normalizeRMS = (rms) => { - rms = rms * 10; - const exp = 1.5; // Adjust exponent value; values greater than 1 expand larger numbers more and compress smaller numbers more - const scaledRMS = Math.pow(rms, exp); - - // Scale between 0.01 (1%) and 1.0 (100%) - return Math.min(1.0, Math.max(0.01, scaledRMS)); - }; - - const analyseAudio = (stream) => { - const audioContext = new AudioContext(); - const audioStreamSource = audioContext.createMediaStreamSource(stream); - - const analyser = audioContext.createAnalyser(); - analyser.minDecibels = MIN_DECIBELS; - audioStreamSource.connect(analyser); - - const bufferLength = analyser.frequencyBinCount; - - const domainData = new Uint8Array(bufferLength); - const timeDomainData = new Uint8Array(analyser.fftSize); - - let lastSoundTime = Date.now(); - hasStartedSpeaking = false; - - const detectSound = () => { - const processFrame = () => { - if (!mediaRecorder || !$showCallOverlay) { - if (mediaRecorder) { - mediaRecorder.stop(); - } - - return; - } - analyser.getByteTimeDomainData(timeDomainData); - analyser.getByteFrequencyData(domainData); - - // Calculate RMS level from time domain data - rmsLevel = calculateRMS(timeDomainData); - - // Check if initial speech/noise has started - const hasSound = domainData.some((value) => value > 0); - if (hasSound) { - stopAllAudio(); - hasStartedSpeaking = true; - lastSoundTime = Date.now(); - } - - // Start silence detection only after initial speech/noise has been detected - if (hasStartedSpeaking) { - if (Date.now() - lastSoundTime > 2000) { - confirmed = true; - - if (mediaRecorder) { - mediaRecorder.stop(); - } - } - } - - window.requestAnimationFrame(processFrame); - }; - - window.requestAnimationFrame(processFrame); - }; - - detectSound(); - }; - - const stopAllAudio = () => { - if (currentUtterance) { - speechSynthesis.cancel(); - currentUtterance = null; - } - if (assistantAudio[assistantAudioIdx]) { - assistantAudio[assistantAudioIdx].pause(); - assistantAudio[assistantAudioIdx].currentTime = 0; - } - - const audioElement = document.getElementById('audioElement'); - - if (audioElement) { - audioElement.pause(); - audioElement.currentTime = 0; - } - assistantSpeaking = false; - }; - - const playAudio = (idx) => { - if ($showCallOverlay) { - return new Promise((res) => { - assistantAudioIdx = idx; - const audioElement = document.getElementById('audioElement'); - const audio = assistantAudio[idx]; - - if (audioElement) { - audioElement.src = audio.src; // Assume `assistantAudio` has objects with a `src` property - - audioElement.muted = true; - - audioElement - .play() - .then(() => { - audioElement.muted = false; - }) - .catch((error) => { - toast.error(error); - }); - - audioElement.onended = async (e) => { - await new Promise((r) => setTimeout(r, 300)); - - if (Object.keys(assistantAudio).length - 1 === idx) { - assistantSpeaking = false; - } - - res(e); - }; - } - }); - } else { - return Promise.resolve(); - } - }; - - const getOpenAISpeech = async (text) => { - const res = await synthesizeOpenAISpeech( - localStorage.token, - $settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice, - text - ).catch((error) => { - toast.error(error); - assistantSpeaking = false; - return null; - }); - - if (res) { - const blob = await res.blob(); - const blobUrl = URL.createObjectURL(blob); - const audio = new Audio(blobUrl); - assistantAudio = audio; - } - }; - - const transcribeHandler = async (audioBlob) => { - // Create a blob from the audio chunks - - await tick(); - const file = blobToFile(audioBlob, 'recording.wav'); - - const res = await transcribeAudio(localStorage.token, file).catch((error) => { - toast.error(error); - return null; - }); - - if (res) { - console.log(res.text); - - if (res.text !== '') { - const _responses = await submitPrompt(res.text, { _raw: true }); - console.log(_responses); - } - } - }; - - const assistantSpeakingHandler = async (content) => { - assistantSpeaking = true; - - if (modelId && ($settings?.showEmojiInCall ?? false)) { - console.log('Generating emoji'); - const res = await generateEmoji(localStorage.token, modelId, content, chatId).catch( - (error) => { - console.error(error); - return null; - } - ); - - if (res) { - console.log(res); - if (/\p{Extended_Pictographic}/u.test(res)) { - emoji = res.match(/\p{Extended_Pictographic}/gu)[0]; - } - } - } - - if (($config.audio.tts.engine ?? '') == '') { - let voices = []; - const getVoicesLoop = setInterval(async () => { - voices = await speechSynthesis.getVoices(); - if (voices.length > 0) { - clearInterval(getVoicesLoop); - - const voice = - voices - ?.filter( - (v) => v.voiceURI === ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice) - ) - ?.at(0) ?? undefined; - - currentUtterance = new SpeechSynthesisUtterance(content); - - if (voice) { - currentUtterance.voice = voice; - } - - speechSynthesis.speak(currentUtterance); - - currentUtterance.onend = async () => { - assistantSpeaking = false; - }; - } - }, 100); - } else if ($config.audio.tts.engine === 'openai') { - console.log('openai'); - - const sentences = extractSentences(content).reduce((mergedTexts, currentText) => { - const lastIndex = mergedTexts.length - 1; - if (lastIndex >= 0) { - const previousText = mergedTexts[lastIndex]; - const wordCount = previousText.split(/\s+/).length; - if (wordCount < 2) { - mergedTexts[lastIndex] = previousText + ' ' + currentText; - } else { - mergedTexts.push(currentText); - } - } else { - mergedTexts.push(currentText); - } - return mergedTexts; - }, []); - - console.log(sentences); - - let lastPlayedAudioPromise = Promise.resolve(); // Initialize a promise that resolves immediately - - for (const [idx, sentence] of sentences.entries()) { - const res = await synthesizeOpenAISpeech( - localStorage.token, - $settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice, - sentence - ).catch((error) => { - toast.error(error); - - assistantSpeaking = false; - return null; - }); - - if (res) { - const blob = await res.blob(); - const blobUrl = URL.createObjectURL(blob); - const audio = new Audio(blobUrl); - assistantAudio[idx] = audio; - lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx)); - - if (idx === sentences.length - 1) { - lastPlayedAudioPromise.then(() => { - assistantSpeaking = false; - }); - } - } - } - } - }; - - const stopRecordingCallback = async (_continue = true) => { - if ($showCallOverlay) { - if (confirmed) { - loading = true; - emoji = null; - - if (cameraStream) { - const imageUrl = takeScreenshot(); - - files = [ - { - type: 'image', - url: imageUrl - } - ]; - } - - const audioBlob = new Blob(audioChunks, { type: 'audio/wav' }); - await transcribeHandler(audioBlob); - - confirmed = false; - loading = false; - } - audioChunks = []; - mediaRecorder = false; - - if (_continue) { - startRecording(); - } - } else { - audioChunks = []; - mediaRecorder = false; - } - }; - - const startRecording = async () => { - const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); - mediaRecorder = new MediaRecorder(stream); - mediaRecorder.onstart = () => { - console.log('Recording started'); - audioChunks = []; - analyseAudio(stream); - }; - mediaRecorder.ondataavailable = (event) => { - if (hasStartedSpeaking) { - audioChunks.push(event.data); - } - }; - mediaRecorder.onstop = async () => { - console.log('Recording stopped'); - - await stopRecordingCallback(); - }; - mediaRecorder.start(); - }; + $: console.log('hasStartedSpeaking', hasStartedSpeaking); let videoInputDevices = []; let selectedVideoInputDeviceId = null; @@ -475,6 +168,286 @@ camera = false; }; + const MIN_DECIBELS = -45; + const VISUALIZER_BUFFER_LENGTH = 300; + + // Function to calculate the RMS level from time domain data + const calculateRMS = (data: Uint8Array) => { + let sumSquares = 0; + for (let i = 0; i < data.length; i++) { + const normalizedValue = (data[i] - 128) / 128; // Normalize the data + sumSquares += normalizedValue * normalizedValue; + } + return Math.sqrt(sumSquares / data.length); + }; + + const analyseAudio = (stream) => { + const audioContext = new AudioContext(); + const audioStreamSource = audioContext.createMediaStreamSource(stream); + + const analyser = audioContext.createAnalyser(); + analyser.minDecibels = MIN_DECIBELS; + audioStreamSource.connect(analyser); + + const bufferLength = analyser.frequencyBinCount; + + const domainData = new Uint8Array(bufferLength); + const timeDomainData = new Uint8Array(analyser.fftSize); + + let lastSoundTime = Date.now(); + hasStartedSpeaking = false; + + const detectSound = () => { + const processFrame = () => { + if (!mediaRecorder || !$showCallOverlay) { + return; + } + + analyser.getByteTimeDomainData(timeDomainData); + analyser.getByteFrequencyData(domainData); + + // Calculate RMS level from time domain data + rmsLevel = calculateRMS(timeDomainData); + + // Check if initial speech/noise has started + const hasSound = domainData.some((value) => value > 0); + if (hasSound) { + hasStartedSpeaking = true; + lastSoundTime = Date.now(); + + // BIG RED TEXT + console.log('%c%s', 'color: red; font-size: 20px;', '🔊 Sound detected'); + stopAllAudio(); + } + + // Start silence detection only after initial speech/noise has been detected + if (hasStartedSpeaking) { + if (Date.now() - lastSoundTime > 2000) { + confirmed = true; + + if (mediaRecorder) { + mediaRecorder.stop(); + } + } + } + + window.requestAnimationFrame(processFrame); + }; + + window.requestAnimationFrame(processFrame); + }; + + detectSound(); + }; + + const transcribeHandler = async (audioBlob) => { + // Create a blob from the audio chunks + + await tick(); + const file = blobToFile(audioBlob, 'recording.wav'); + + const res = await transcribeAudio(localStorage.token, file).catch((error) => { + toast.error(error); + return null; + }); + + if (res) { + console.log(res.text); + + if (res.text !== '') { + const _responses = await submitPrompt(res.text, { _raw: true }); + console.log(_responses); + } + } + }; + + const stopAllAudio = async () => { + interrupted = true; + + if (chatStreaming) { + stopResponse(); + } + + if (currentUtterance) { + speechSynthesis.cancel(); + currentUtterance = null; + } + + await tick(); + audioQueue = []; + await tick(); + + const audioElement = document.getElementById('audioElement'); + if (audioElement) { + audioElement.pause(); + audioElement.currentTime = 0; + } + + assistantSpeaking = false; + }; + + const speakSpeechSynthesisHandler = (content) => { + if ($showCallOverlay) { + return new Promise((resolve) => { + let voices = []; + const getVoicesLoop = setInterval(async () => { + voices = await speechSynthesis.getVoices(); + if (voices.length > 0) { + clearInterval(getVoicesLoop); + + const voice = + voices + ?.filter( + (v) => v.voiceURI === ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice) + ) + ?.at(0) ?? undefined; + + currentUtterance = new SpeechSynthesisUtterance(content); + + if (voice) { + currentUtterance.voice = voice; + } + + speechSynthesis.speak(currentUtterance); + currentUtterance.onend = async (e) => { + await new Promise((r) => setTimeout(r, 100)); + resolve(e); + }; + } + }, 100); + }); + } else { + return Promise.resolve(); + } + }; + + const playAudio = (audio) => { + if ($showCallOverlay) { + return new Promise((resolve) => { + const audioElement = document.getElementById('audioElement'); + + if (audioElement) { + audioElement.src = audio.src; + audioElement.muted = true; + + audioElement + .play() + .then(() => { + audioElement.muted = false; + }) + .catch((error) => { + console.error(error); + }); + + audioElement.onended = async (e) => { + await new Promise((r) => setTimeout(r, 100)); + resolve(e); + }; + } + }); + } else { + return Promise.resolve(); + } + }; + + const playAudioHandler = async () => { + console.log('playAudioHandler', audioQueue, assistantSpeaking, audioQueue.length > 0); + if (!assistantSpeaking && !interrupted && audioQueue.length > 0) { + assistantSpeaking = true; + const audioToPlay = audioQueue.shift(); // Shift the audio out from queue before playing. + audioQueue = audioQueue; + await playAudio(audioToPlay); + assistantSpeaking = false; + } + }; + + const setContentAudio = async (content, idx) => { + if (assistantSentenceAudios[idx] === undefined) { + console.log('%c%s', 'color: red; font-size: 20px;', content); + + assistantSentenceAudios[idx] = null; + const res = await synthesizeOpenAISpeech( + localStorage.token, + $settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice, + content + ).catch((error) => { + toast.error(error); + assistantSpeaking = false; + return null; + }); + + if (res) { + const blob = await res.blob(); + const blobUrl = URL.createObjectURL(blob); + const audio = new Audio(blobUrl); + assistantSentenceAudios[idx] = audio; + audioQueue.push(audio); + audioQueue = audioQueue; + } + } + }; + + const stopRecordingCallback = async (_continue = true) => { + console.log('%c%s', 'color: red; font-size: 20px;', '🚨 stopRecordingCallback 🚨'); + + if ($showCallOverlay) { + // deep copy the audioChunks array + const _audioChunks = audioChunks.slice(0); + + audioChunks = []; + mediaRecorder = false; + + if (_continue) { + startRecording(); + } + + if (confirmed) { + loading = true; + emoji = null; + + if (cameraStream) { + const imageUrl = takeScreenshot(); + + files = [ + { + type: 'image', + url: imageUrl + } + ]; + } + + const audioBlob = new Blob(_audioChunks, { type: 'audio/wav' }); + await transcribeHandler(audioBlob); + + confirmed = false; + loading = false; + } + } else { + audioChunks = []; + mediaRecorder = false; + } + }; + + const startRecording = async () => { + const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); + mediaRecorder = new MediaRecorder(stream); + mediaRecorder.onstart = () => { + console.log('Recording started'); + audioChunks = []; + analyseAudio(stream); + }; + mediaRecorder.ondataavailable = (event) => { + if (hasStartedSpeaking) { + audioChunks.push(event.data); + } + }; + mediaRecorder.onstop = async () => { + console.log('Recording stopped'); + await stopRecordingCallback(); + }; + mediaRecorder.start(); + }; + $: if ($showCallOverlay) { startRecording(); } else { @@ -483,30 +456,73 @@ stopRecordingCallback(false); } + $: { + if (audioQueue.length > 0 && !assistantSpeaking) { + playAudioHandler(); + } + } + onMount(() => { console.log(eventTarget); eventTarget.addEventListener('chat:start', async (e) => { - console.log('Chat start event:', e.detail); - message = ''; + console.log('Chat start event:', e); + interrupted = false; + + assistantMessage = ''; + assistantSentenceIdx = -1; + assistantSentenceAudios = {}; // Reset audio tracking + audioQueue = []; // Clear the audio queue + + chatStreaming = true; }); eventTarget.addEventListener('chat', async (e) => { const { content } = e.detail; + assistantMessage += content; + await tick(); - message += content; - console.log('Chat event:', message); + if (!interrupted) { + if ($config.audio.tts.engine !== '') { + assistantSentenceIdx = assistantSentences.length - 2; + + if (assistantSentenceIdx >= 0 && !assistantSentenceAudios[assistantSentenceIdx]) { + await tick(); + setContentAudio(assistantSentences[assistantSentenceIdx], assistantSentenceIdx); + } + } + } + + chatStreaming = true; }); eventTarget.addEventListener('chat:finish', async (e) => { - console.log('Chat finish event:', e.detail); - message = ''; + chatStreaming = false; + loading = false; + + console.log('Chat finish event:', e); + await tick(); + + if (!interrupted) { + if ($config.audio.tts.engine !== '') { + for (const [idx, sentence] of assistantSentences.entries()) { + if (!assistantSentenceAudios[idx]) { + await tick(); + setContentAudio(sentence, idx); + } + } + } else { + emoji = generateEmoji(localStorage.token, modelId, assistantMessage); + speakSpeechSynthesisHandler(assistantMessage); + } + } }); }); +