diff --git a/conf/llm_factories.json b/conf/llm_factories.json index be1065c8e..44ebda550 100644 --- a/conf/llm_factories.json +++ b/conf/llm_factories.json @@ -104,6 +104,12 @@ "max_tokens": 2048, "model_type": "embedding" }, + { + "llm_name": "text-embedding-v3", + "tags": "TEXT EMBEDDING,2K", + "max_tokens": 2048, + "model_type": "embedding" + }, { "llm_name": "paraformer-realtime-8k-v1", "tags": "SPEECH2TEXT", @@ -131,13 +137,37 @@ "status": "1", "llm": [ { - "llm_name": "glm-3-turbo", + "llm_name": "glm-4", "tags": "LLM,CHAT,", "max_tokens": 128000, "model_type": "chat" }, { - "llm_name": "glm-4", + "llm_name": "glm-4-airx", + "tags": "LLM,CHAT,", + "max_tokens": 8000, + "model_type": "chat" + }, + { + "llm_name": "glm-4-air", + "tags": "LLM,CHAT,", + "max_tokens": 128000, + "model_type": "chat" + }, + { + "llm_name": "glm-4-flash", + "tags": "LLM,CHAT,", + "max_tokens": 128000, + "model_type": "chat" + }, + { + "llm_name": "glm-4-long", + "tags": "LLM,CHAT,", + "max_tokens": 1000000, + "model_type": "chat" + }, + { + "llm_name": "glm-3-turbo", "tags": "LLM,CHAT,", "max_tokens": 128000, "model_type": "chat" @@ -153,6 +183,12 @@ "tags": "TEXT EMBEDDING", "max_tokens": 512, "model_type": "embedding" + }, + { + "llm_name": "embedding-3", + "tags": "TEXT EMBEDDING", + "max_tokens": 512, + "model_type": "embedding" } ] },