Added doc on dev-slim (#2627)

Added doc on dev-slim

### Type of change

- [x] Documentation Update
- [x] Refactoring

---------

Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
This commit is contained in:
Zhichang Yu 2024-09-27 19:15:27 +08:00 committed by GitHub
parent ca2de896c7
commit 1b2f66fc11
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 186 additions and 1889 deletions

View File

@ -2,6 +2,8 @@
FROM ubuntu:24.04 AS base
USER root
ENV LIGHTEN=1
WORKDIR /ragflow
RUN rm -f /etc/apt/apt.conf.d/docker-clean \
@ -43,7 +45,11 @@ RUN cd web && npm i --force && npm run build
COPY pyproject.toml poetry.toml poetry.lock ./
RUN --mount=type=cache,target=/root/.cache/pypoetry,sharing=locked \
/root/.local/bin/poetry install --sync --no-cache --no-root
if [ "$LIGHTEN" -eq 0 ]; then \
/root/.local/bin/poetry install --sync --no-cache --no-root --with=full; \
else \
/root/.local/bin/poetry install --sync --no-cache --no-root; \
fi
# production stage
FROM base AS production
@ -77,9 +83,6 @@ ENV PATH="${VIRTUAL_ENV}/bin:/root/.local/bin:${PATH}"
# Download nltk data
RUN python3 -m nltk.downloader wordnet punkt punkt_tab
# Copy models downloaded via download_deps.sh
# COPY det.onnx layout.laws.onnx layout.manual.onnx layout.onnx layout.paper.onnx ocr.res rec.onnx tsr.onnx updown_concat_xgb.model /ragflow/rag/res/deepdoc/
ENV PYTHONPATH=/ragflow/
COPY docker/entrypoint.sh ./entrypoint.sh

View File

@ -31,13 +31,22 @@ To build a RAGFlow Docker image from source code:
```bash
git clone https://github.com/infiniflow/ragflow.git
cd ragflow
```
### Build the Docker Image
Navigate to the `ragflow` directory where the Dockerfile and other necessary files are located. Now you can build the Docker image using the provided Dockerfile. The command below specifies which Dockerfile to use and tages the image with a name for reference purpose.
#### Build image `ragflow:dev-slim`
```bash
docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .
```
This image's size is about 1GB. It relies external LLM services since it doesn't contain embedding models.
#### Build image `ragflow:dev`
```bash
cd ragflow/
docker build -f Dockerfile.scratch -t infiniflow/ragflow:dev .
```
docker build -f Dockerfile -t infiniflow/ragflow:dev .
```
This image's size is about 11GB. It contains embedding models, and can inference via local CPU/GPU or external LLM services.

2018
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -17,7 +17,6 @@ azure-storage-file-datalake = "12.16.0"
anthropic = "=0.34.1"
arxiv = "2.1.3"
aspose-slides = { version = "^24.9.0", markers = "platform_machine == 'x86_64'" }
bcembedding = "0.1.3"
bio = "1.7.1"
boto3 = "1.34.140"
botocore = "1.34.140"
@ -34,10 +33,8 @@ editdistance = "0.8.1"
elastic-transport = "8.12.0"
elasticsearch = "8.12.1"
elasticsearch-dsl = "8.12.0"
fastembed = "^0.3.6"
fasttext = "0.9.3"
filelock = "3.15.4"
flagembedding = "1.2.10"
flask = "3.0.3"
flask-cors = "5.0.0"
flask-login = "0.6.3"
@ -58,7 +55,6 @@ nltk = "3.9.1"
numpy = "1.26.4"
ollama = "0.2.1"
onnxruntime = "1.17.3"
onnxruntime-gpu = { version = "^1.17.1", markers = "platform_machine == 'x86_64'" }
openai = "1.12.0"
opencv-python = "4.9.0.80"
opencv-python-headless = "4.9.0.80"
@ -97,8 +93,6 @@ tabulate = "0.9.0"
tencentcloud-sdk-python = "3.0.1215"
tika = "2.6.0"
tiktoken = "0.6.0"
torch = "2.3.0"
transformers = "4.38.1"
umap_learn = "0.5.6"
vertexai = "1.64.0"
volcengine = "1.0.146"
@ -107,7 +101,7 @@ webdriver-manager = "4.0.1"
werkzeug = "3.0.3"
wikipedia = "1.4.0"
word2number = "1.1"
xgboost = "2.1.0"
xgboost = "1.5.0"
xpinyin = "0.7.6"
yfinance = "0.1.96"
zhipuai = "2.0.1"
@ -117,12 +111,18 @@ python-docx = "^1.1.2"
pypdf2 = "^3.0.1"
graspologic = "^3.4.1"
pymysql = "^1.1.1"
mini-racer = "^0.12.4"
[[tool.poetry.source]]
name = "tsinghua"
url = "https://pypi.tuna.tsinghua.edu.cn/simple/"
priority = "primary"
[tool.poetry.group.full]
optional = true
[tool.poetry.group.full.dependencies]
bcembedding = "0.1.3"
fastembed = "^0.3.6"
flagembedding = "1.2.10"
mini-racer = "^0.12.4"
torch = "2.3.0"
transformers = "4.38.1"
[build-system]
requires = ["poetry-core"]

View File

@ -14,6 +14,7 @@
# limitations under the License.
#
import os
import logging
from api.utils import get_base_config, decrypt_database_config
from api.utils.file_utils import get_project_base_directory
from api.utils.log_utils import LoggerFactory, getLogger
@ -48,10 +49,16 @@ minio_logger = getLogger("minio")
s3_logger = getLogger("s3")
azure_logger = getLogger("azure")
cron_logger = getLogger("cron_logger")
cron_logger.setLevel(20)
chunk_logger = getLogger("chunk_logger")
database_logger = getLogger("database")
for logger in [es_logger, minio_logger, s3_logger, azure_logger, cron_logger, chunk_logger, database_logger]:
logger.basicConfig(
level=logging.INFO,
format="%(asctime)-15s %(levelname)-8s (%(process)d) %(message)s",
)
SVR_QUEUE_NAME = "rag_flow_svr_queue"
SVR_QUEUE_RETENTION = 60*60
SVR_QUEUE_MAX_LEN = 1024