Fix Bedrock system prompt (#2062)

### What problem does this PR solve? Bugfix: usage of Bedrock models require the system prompt (for models that support it) to be provided in the API in a different way, at least that was my experience with it just today. This PR fixes it. https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
2025-08-12 06:28:58 +08:00 · 2024-08-23 06:44:37 +03:00 · 2024-08-23 06:44:37 +03:00 · 19396998eb
commit 19396998eb
parent 89b05ad79f
1 changed files with 4 additions and 6 deletions
--- a/rag/llm/chat_model.py
+++ b/rag/llm/chat_model.py
@ -667,8 +667,6 @@ class BedrockChat(Base):

    def chat(self, system, history, gen_conf):
        from botocore.exceptions import ClientError
-        if system:
-            history.insert(0, {"role": "system", "content": system})
        for k in list(gen_conf.keys()):
            if k not in ["temperature", "top_p", "max_tokens"]:
                del gen_conf[k]
@ -688,7 +686,8 @@ class BedrockChat(Base):
            response = self.client.converse(
                modelId=self.model_name,
                messages=history,
-                inferenceConfig=gen_conf
+                inferenceConfig=gen_conf,
+                system=[{"text": system}] if system else None,
            )
            
            # Extract and print the response text.
@ -700,8 +699,6 @@ class BedrockChat(Base):

    def chat_streamly(self, system, history, gen_conf):
        from botocore.exceptions import ClientError
-        if system:
-            history.insert(0, {"role": "system", "content": system})
        for k in list(gen_conf.keys()):
            if k not in ["temperature", "top_p", "max_tokens"]:
                del gen_conf[k]
@ -720,7 +717,8 @@ class BedrockChat(Base):
                response = self.client.converse(
                    modelId=self.model_name,
                    messages=history,
-                    inferenceConfig=gen_conf
+                    inferenceConfig=gen_conf,
+                    system=[{"text": system}] if system else None,
                )
                ans = response["output"]["message"]["content"][0]["text"]
                return ans, num_tokens_from_string(ans)