diff --git a/docs/xinference.md b/docs/xinference.md
new file mode 100644
index 000000000..2a8feef83
--- /dev/null
+++ b/docs/xinference.md
@@ -0,0 +1,43 @@
+# Xinference
+
+
+

+
+
+Xorbits Inference([Xinference](https://github.com/xorbitsai/inference)) empowers you to unleash the full potential of cutting-edge AI models.
+
+## Install
+
+- [pip install "xinference[all]"](https://inference.readthedocs.io/en/latest/getting_started/installation.html)
+- [Docker](https://inference.readthedocs.io/en/latest/getting_started/using_docker_image.html)
+
+To start a local instance of Xinference, run the following command:
+```bash
+$ xinference-local --host 0.0.0.0 --port 9997
+```
+## Launch Xinference
+
+Decide which LLM you want to deploy ([here's a list for supported LLM](https://inference.readthedocs.io/en/latest/models/builtin/)), say, **mistral**.
+Execute the following command to launch the model, remember to replace ${quantization} with your chosen quantization method from the options listed above:
+```bash
+$ xinference launch -u mistral --model-name mistral-v0.1 --size-in-billions 7 --model-format pytorch --quantization ${quantization}
+```
+
+## Use Xinference in RAGFlow
+
+- Go to 'Settings > Model Providers > Models to be added > Xinference'.
+
+
+

+
+
+> Base URL: Enter the base URL where the Ollama service is accessible, like, http://:11434
+
+- Use Xinference Models.
+
+
+

+
+
+

+
\ No newline at end of file
diff --git a/rag/llm/cv_model.py b/rag/llm/cv_model.py
index 4b966991b..010883a82 100644
--- a/rag/llm/cv_model.py
+++ b/rag/llm/cv_model.py
@@ -161,9 +161,10 @@ class OllamaCV(Base):
except Exception as e:
return "**ERROR**: " + str(e), 0
+
class XinferenceCV(Base):
def __init__(self, key, model_name="", lang="Chinese", base_url=""):
- self.client = OpenAI(api_key=key, base_url=base_url)
+ self.client = OpenAI(api_key="xxx", base_url=base_url)
self.model_name = model_name
self.lang = lang