From aed11e72a60e920659f2bd33131bd71cb179046f Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Fri, 18 Oct 2024 11:50:58 -0300 Subject: [PATCH] fix encoding if error --- apps/api/src/lib/LLM-extraction/helpers.ts | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/apps/api/src/lib/LLM-extraction/helpers.ts b/apps/api/src/lib/LLM-extraction/helpers.ts index f47a6b3c..2143a32d 100644 --- a/apps/api/src/lib/LLM-extraction/helpers.ts +++ b/apps/api/src/lib/LLM-extraction/helpers.ts @@ -6,7 +6,13 @@ export function numTokensFromString(message: string, model: string): number { const encoder = encoding_for_model(model as TiktokenModel); // Encode the message into tokens - const tokens = encoder.encode(message); + let tokens: Uint32Array; + try { + tokens = encoder.encode(message); + } catch (error) { + message = message.replace("<|endoftext|>", ""); + tokens = encoder.encode(message); + } // Free the encoder resources after use encoder.free();