diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py index 11956ad81..f5b66872e 100644 --- a/rag/llm/chat_model.py +++ b/rag/llm/chat_model.py @@ -630,7 +630,7 @@ class BedrockChat(Base): modelId=self.model_name, messages=history, inferenceConfig=gen_conf, - system=[{"text": system}] if system else None, + system=[{"text": (system if system else "Answer the user's message.")}] , ) # Extract and print the response text. @@ -675,7 +675,8 @@ class BedrockChat(Base): streaming_response = self.client.converse_stream( modelId=self.model_name, messages=history, - inferenceConfig=gen_conf + inferenceConfig=gen_conf, + system=[{"text": system if system else ""}], ) # Extract and print the streamed response text in real-time. diff --git a/rag/llm/embedding_model.py b/rag/llm/embedding_model.py index 4189a022f..39704590a 100644 --- a/rag/llm/embedding_model.py +++ b/rag/llm/embedding_model.py @@ -443,7 +443,7 @@ class BedrockEmbed(Base): response = self.client.invoke_model(modelId=self.model_name, body=json.dumps(body)) model_response = json.loads(response["body"].read()) - embeddings.extend([model_response["embedding"]]) + embeddings.extend(model_response["embedding"]) return np.array(embeddings), token_count