Added doc on dev-slim (#2627)

Added doc on dev-slim ### Type of change - [x] Documentation Update - [x] Refactoring --------- Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
2025-08-12 09:59:01 +08:00 · 2024-09-27 19:15:27 +08:00 · 2024-09-27 19:15:27 +08:00 · 1b2f66fc11
commit 1b2f66fc11
parent ca2de896c7
5 changed files with 186 additions and 1889 deletions
--- a/Dockerfile.scratch
+++ b/Dockerfile.scratch
@ -2,6 +2,8 @@
 FROM ubuntu:24.04 AS base
 USER root

+ENV LIGHTEN=1
+
 WORKDIR /ragflow

 RUN rm -f /etc/apt/apt.conf.d/docker-clean \
@ -43,7 +45,11 @@ RUN cd web && npm i --force && npm run build
 COPY pyproject.toml poetry.toml poetry.lock ./

 RUN --mount=type=cache,target=/root/.cache/pypoetry,sharing=locked \
-    /root/.local/bin/poetry install --sync --no-cache --no-root
+    if [ "$LIGHTEN" -eq 0 ]; then \
+        /root/.local/bin/poetry install --sync --no-cache --no-root --with=full; \
+    else \
+        /root/.local/bin/poetry install --sync --no-cache --no-root; \
+    fi

 # production stage
 FROM base AS production
@ -77,9 +83,6 @@ ENV PATH="${VIRTUAL_ENV}/bin:/root/.local/bin:${PATH}"
 # Download nltk data
 RUN python3 -m nltk.downloader wordnet punkt punkt_tab

-# Copy models downloaded via download_deps.sh
-# COPY det.onnx layout.laws.onnx layout.manual.onnx layout.onnx layout.paper.onnx ocr.res rec.onnx tsr.onnx updown_concat_xgb.model /ragflow/rag/res/deepdoc/
-
 ENV PYTHONPATH=/ragflow/

 COPY docker/entrypoint.sh ./entrypoint.sh
--- a/docs/guides/develop/build_docker_image.md
+++ b/docs/guides/develop/build_docker_image.md
@ -31,13 +31,22 @@ To build a RAGFlow Docker image from source code:

 ```bash
 git clone https://github.com/infiniflow/ragflow.git
+cd ragflow
 ```

 ### Build the Docker Image

 Navigate to the `ragflow` directory where the Dockerfile and other necessary files are located. Now you can build the Docker image using the provided Dockerfile. The command below specifies which Dockerfile to use and tages the image with a name for reference purpose.

+#### Build image `ragflow:dev-slim`
+```bash
+docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .
+```
+This image's size is about 1GB. It relies external LLM services since it doesn't contain embedding models. 
+
+#### Build image `ragflow:dev`
 ```bash
 cd ragflow/
-docker build -f Dockerfile.scratch -t infiniflow/ragflow:dev .
-```
+docker build -f Dockerfile -t infiniflow/ragflow:dev .
+```
+This image's size is about 11GB. It contains embedding models, and can inference via local CPU/GPU or external LLM services.
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@ -17,7 +17,6 @@ azure-storage-file-datalake = "12.16.0"
 anthropic = "=0.34.1"
 arxiv = "2.1.3"
 aspose-slides = { version = "^24.9.0", markers = "platform_machine == 'x86_64'" }
-bcembedding = "0.1.3"
 bio = "1.7.1"
 boto3 = "1.34.140"
 botocore = "1.34.140"
@ -34,10 +33,8 @@ editdistance = "0.8.1"
 elastic-transport = "8.12.0"
 elasticsearch = "8.12.1"
 elasticsearch-dsl = "8.12.0"
-fastembed = "^0.3.6"
 fasttext = "0.9.3"
 filelock = "3.15.4"
-flagembedding = "1.2.10"
 flask = "3.0.3"
 flask-cors = "5.0.0"
 flask-login = "0.6.3"
@ -58,7 +55,6 @@ nltk = "3.9.1"
 numpy = "1.26.4"
 ollama = "0.2.1"
 onnxruntime = "1.17.3"
-onnxruntime-gpu = { version = "^1.17.1", markers = "platform_machine == 'x86_64'" }
 openai = "1.12.0"
 opencv-python = "4.9.0.80"
 opencv-python-headless = "4.9.0.80"
@ -97,8 +93,6 @@ tabulate = "0.9.0"
 tencentcloud-sdk-python = "3.0.1215"
 tika = "2.6.0"
 tiktoken = "0.6.0"
-torch = "2.3.0"
-transformers = "4.38.1"
 umap_learn = "0.5.6"
 vertexai = "1.64.0"
 volcengine = "1.0.146"
@ -107,7 +101,7 @@ webdriver-manager = "4.0.1"
 werkzeug = "3.0.3"
 wikipedia = "1.4.0"
 word2number = "1.1"
-xgboost = "2.1.0"
+xgboost = "1.5.0"
 xpinyin = "0.7.6"
 yfinance = "0.1.96"
 zhipuai = "2.0.1"
@ -117,12 +111,18 @@ python-docx = "^1.1.2"
 pypdf2 = "^3.0.1"
 graspologic = "^3.4.1"
 pymysql = "^1.1.1"
-mini-racer = "^0.12.4"

-[[tool.poetry.source]]
-name = "tsinghua"
-url = "https://pypi.tuna.tsinghua.edu.cn/simple/"
-priority = "primary"
+
+[tool.poetry.group.full]
+optional = true
+
+[tool.poetry.group.full.dependencies]
+bcembedding = "0.1.3"
+fastembed = "^0.3.6"
+flagembedding = "1.2.10"
+mini-racer = "^0.12.4"
+torch = "2.3.0"
+transformers = "4.38.1"

 [build-system]
 requires = ["poetry-core"]
--- a/rag/settings.py
+++ b/rag/settings.py
@ -14,6 +14,7 @@
 #  limitations under the License.
 #
 import os
+import logging
 from api.utils import get_base_config, decrypt_database_config
 from api.utils.file_utils import get_project_base_directory
 from api.utils.log_utils import LoggerFactory, getLogger
@ -48,10 +49,16 @@ minio_logger = getLogger("minio")
 s3_logger = getLogger("s3")
 azure_logger = getLogger("azure")
 cron_logger = getLogger("cron_logger")
-cron_logger.setLevel(20)
 chunk_logger = getLogger("chunk_logger")
 database_logger = getLogger("database")

+for logger in [es_logger, minio_logger, s3_logger, azure_logger, cron_logger, chunk_logger, database_logger]:
+    logger.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)-15s %(levelname)-8s (%(process)d) %(message)s",
+    )
+
+
 SVR_QUEUE_NAME = "rag_flow_svr_queue"
 SVR_QUEUE_RETENTION = 60*60
 SVR_QUEUE_MAX_LEN = 1024