mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-08-12 07:19:04 +08:00
Updated Dockefile to use cache (#2703)
### What problem does this PR solve? Updated Dockefile to use cache ### Type of change - [ ] Bug Fix (non-breaking change which fixes an issue) - [ ] New Feature (non-breaking change which adds functionality) - [ ] Documentation Update - [ ] Refactoring - [ ] Performance Improvement - [x] Other (please describe): CI
This commit is contained in:
parent
62202b7eff
commit
5f4d2dc4fe
2
.github/workflows/tests.yml
vendored
2
.github/workflows/tests.yml
vendored
@ -48,7 +48,7 @@ jobs:
|
||||
- name: Build ragflow:dev-slim
|
||||
run: |
|
||||
RUNNER_WORKSPACE_PREFIX=${RUNNER_WORKSPACE_PREFIX:-$HOME}
|
||||
cp -r ${RUNNER_WORKSPACE_PREFIX}/huggingface.co .
|
||||
cp -r ${RUNNER_WORKSPACE_PREFIX}/huggingface.co ${RUNNER_WORKSPACE_PREFIX}/nltk_data .
|
||||
sudo docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .
|
||||
|
||||
- name: Build ragflow:dev
|
||||
|
35
Dockerfile
35
Dockerfile
@ -6,13 +6,18 @@ ENV LIGHTEN=0
|
||||
|
||||
WORKDIR /ragflow
|
||||
|
||||
RUN apt update && apt --no-install-recommends install -y ca-certificates
|
||||
RUN rm -f /etc/apt/apt.conf.d/docker-clean \
|
||||
&& echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache
|
||||
|
||||
RUN --mount=type=cache,id=ragflow_base_apt,target=/var/cache/apt,sharing=locked \
|
||||
apt update && apt-get --no-install-recommends install -y ca-certificates
|
||||
|
||||
# if you located in China, you can use tsinghua mirror to speed up apt
|
||||
RUN sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list.d/ubuntu.sources
|
||||
|
||||
RUN apt update && apt install -y curl libpython3-dev nginx libglib2.0-0 libglx-mesa0 pkg-config libicu-dev libgdiplus python3-poetry \
|
||||
&& apt clean && rm -rf /var/lib/apt/lists/*
|
||||
RUN --mount=type=cache,id=ragflow_base_apt,target=/var/cache/apt,sharing=locked \
|
||||
apt update && apt install -y curl libpython3-dev nginx libglib2.0-0 libglx-mesa0 pkg-config libicu-dev libgdiplus python3-poetry \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN curl -o libssl1.deb http://archive.ubuntu.com/ubuntu/pool/main/o/openssl1.0/libssl1.0.0_1.0.2n-1ubuntu5_amd64.deb && dpkg -i libssl1.deb && rm -f libssl1.deb
|
||||
|
||||
@ -22,7 +27,6 @@ ENV PYTHONDONTWRITEBYTECODE=1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1
|
||||
ENV POETRY_NO_INTERACTION=1
|
||||
ENV POETRY_VIRTUALENVS_IN_PROJECT=true
|
||||
ENV POETRY_VIRTUALENVS_CREATE=true
|
||||
ENV POETRY_KEYRING_ENABLED=false
|
||||
ENV POETRY_REQUESTS_TIMEOUT=15
|
||||
|
||||
# builder stage
|
||||
@ -31,16 +35,18 @@ USER root
|
||||
|
||||
WORKDIR /ragflow
|
||||
|
||||
RUN apt update && apt install -y nodejs npm cargo \
|
||||
&& apt clean && rm -rf /var/lib/apt/lists/*
|
||||
RUN --mount=type=cache,id=ragflow_builder_apt,target=/var/cache/apt,sharing=locked \
|
||||
apt update && apt install -y nodejs npm cargo && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY web web
|
||||
RUN cd web && npm i --force && npm run build
|
||||
RUN --mount=type=cache,id=ragflow_builder_npm,target=/root/.npm,sharing=locked \
|
||||
cd web && npm i --force && npm run build
|
||||
|
||||
# install dependencies from poetry.lock file
|
||||
COPY pyproject.toml poetry.toml poetry.lock ./
|
||||
|
||||
RUN --mount=type=cache,target=/root/.cache/pypoetry,sharing=locked \
|
||||
RUN --mount=type=cache,id=ragflow_builder_poetry,target=/root/.cache/pypoetry,sharing=locked \
|
||||
if [ "$LIGHTEN" -eq 0 ]; then \
|
||||
poetry install --sync --no-cache --no-root --with=full; \
|
||||
else \
|
||||
@ -55,8 +61,9 @@ WORKDIR /ragflow
|
||||
|
||||
# Install python packages' dependencies
|
||||
# cv2 requires libGL.so.1
|
||||
RUN apt update && apt install -y --no-install-recommends nginx libgl1 vim less \
|
||||
&& apt clean && rm -rf /var/lib/apt/lists/*
|
||||
RUN --mount=type=cache,id=ragflow_production_apt,target=/var/cache/apt,sharing=locked \
|
||||
apt update && apt install -y --no-install-recommends nginx libgl1 vim less && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY web web
|
||||
COPY api api
|
||||
@ -82,16 +89,16 @@ RUN --mount=type=bind,source=huggingface.co,target=/huggingface.co \
|
||||
/huggingface.co/maidalun1020/bce-reranker-base_v1 \
|
||||
| tar -xf - --strip-components=2 -C /root/.ragflow
|
||||
|
||||
# Copy nltk data downloaded via download_deps.py
|
||||
COPY nltk_data /root/nltk_data
|
||||
|
||||
# Copy compiled web pages
|
||||
COPY --from=builder /ragflow/web/dist /ragflow/web/dist
|
||||
|
||||
# Copy Python environment and packages
|
||||
ENV VIRTUAL_ENV=/ragflow/.venv
|
||||
COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
|
||||
ENV PATH="${VIRTUAL_ENV}/bin:/root/.local/bin:${PATH}"
|
||||
|
||||
# Download nltk data
|
||||
RUN python3 -m nltk.downloader wordnet punkt punkt_tab
|
||||
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
|
||||
|
||||
ENV PYTHONPATH=/ragflow/
|
||||
|
||||
|
@ -6,13 +6,18 @@ ENV LIGHTEN=1
|
||||
|
||||
WORKDIR /ragflow
|
||||
|
||||
RUN apt update && apt --no-install-recommends install -y ca-certificates
|
||||
RUN rm -f /etc/apt/apt.conf.d/docker-clean \
|
||||
&& echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache
|
||||
|
||||
RUN --mount=type=cache,id=ragflow_base_apt,target=/var/cache/apt,sharing=locked \
|
||||
apt update && apt-get --no-install-recommends install -y ca-certificates
|
||||
|
||||
# if you located in China, you can use tsinghua mirror to speed up apt
|
||||
RUN sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list.d/ubuntu.sources
|
||||
|
||||
RUN apt update && apt install -y curl libpython3-dev nginx libglib2.0-0 libglx-mesa0 pkg-config libicu-dev libgdiplus python3-poetry \
|
||||
&& apt clean && rm -rf /var/lib/apt/lists/*
|
||||
RUN --mount=type=cache,id=ragflow_base_apt,target=/var/cache/apt,sharing=locked \
|
||||
apt update && apt install -y curl libpython3-dev nginx libglib2.0-0 libglx-mesa0 pkg-config libicu-dev libgdiplus python3-poetry \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN curl -o libssl1.deb http://archive.ubuntu.com/ubuntu/pool/main/o/openssl1.0/libssl1.0.0_1.0.2n-1ubuntu5_amd64.deb && dpkg -i libssl1.deb && rm -f libssl1.deb
|
||||
|
||||
@ -22,7 +27,6 @@ ENV PYTHONDONTWRITEBYTECODE=1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1
|
||||
ENV POETRY_NO_INTERACTION=1
|
||||
ENV POETRY_VIRTUALENVS_IN_PROJECT=true
|
||||
ENV POETRY_VIRTUALENVS_CREATE=true
|
||||
ENV POETRY_KEYRING_ENABLED=false
|
||||
ENV POETRY_REQUESTS_TIMEOUT=15
|
||||
|
||||
# builder stage
|
||||
@ -31,16 +35,18 @@ USER root
|
||||
|
||||
WORKDIR /ragflow
|
||||
|
||||
RUN apt update && apt install -y nodejs npm cargo \
|
||||
&& apt clean && rm -rf /var/lib/apt/lists/*
|
||||
RUN --mount=type=cache,id=ragflow_builder_apt,target=/var/cache/apt,sharing=locked \
|
||||
apt update && apt install -y nodejs npm cargo && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY web web
|
||||
RUN cd web && npm i --force && npm run build
|
||||
RUN --mount=type=cache,id=ragflow_builder_npm,target=/root/.npm,sharing=locked \
|
||||
cd web && npm i --force && npm run build
|
||||
|
||||
# install dependencies from poetry.lock file
|
||||
COPY pyproject.toml poetry.toml poetry.lock ./
|
||||
|
||||
RUN --mount=type=cache,target=/root/.cache/pypoetry,sharing=locked \
|
||||
RUN --mount=type=cache,id=ragflow_builder_poetry,target=/root/.cache/pypoetry,sharing=locked \
|
||||
if [ "$LIGHTEN" -eq 0 ]; then \
|
||||
poetry install --sync --no-cache --no-root --with=full; \
|
||||
else \
|
||||
@ -55,8 +61,9 @@ WORKDIR /ragflow
|
||||
|
||||
# Install python packages' dependencies
|
||||
# cv2 requires libGL.so.1
|
||||
RUN apt update && apt install -y --no-install-recommends nginx libgl1 vim less \
|
||||
&& apt clean && rm -rf /var/lib/apt/lists/*
|
||||
RUN --mount=type=cache,id=ragflow_production_apt,target=/var/cache/apt,sharing=locked \
|
||||
apt update && apt install -y --no-install-recommends nginx libgl1 vim less && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY web web
|
||||
COPY api api
|
||||
@ -75,16 +82,16 @@ RUN --mount=type=bind,source=huggingface.co,target=/huggingface.co \
|
||||
/huggingface.co/InfiniFlow/deepdoc \
|
||||
| tar -xf - --strip-components=3 -C /ragflow/rag/res/deepdoc
|
||||
|
||||
# Copy nltk data downloaded via download_deps.py
|
||||
COPY nltk_data /root/nltk_data
|
||||
|
||||
# Copy compiled web pages
|
||||
COPY --from=builder /ragflow/web/dist /ragflow/web/dist
|
||||
|
||||
# Copy Python environment and packages
|
||||
ENV VIRTUAL_ENV=/ragflow/.venv
|
||||
COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
|
||||
ENV PATH="${VIRTUAL_ENV}/bin:/root/.local/bin:${PATH}"
|
||||
|
||||
# Download nltk data
|
||||
RUN python3 -m nltk.downloader wordnet punkt punkt_tab
|
||||
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
|
||||
|
||||
ENV PYTHONPATH=/ragflow/
|
||||
|
||||
|
@ -220,7 +220,7 @@ This image is approximately 1 GB in size and relies on external LLM and embeddin
|
||||
```bash
|
||||
git clone https://github.com/infiniflow/ragflow.git
|
||||
cd ragflow/
|
||||
pip3 install huggingface-hub
|
||||
pip3 install huggingface-hub nltk
|
||||
python3 download_deps.py
|
||||
docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .
|
||||
```
|
||||
@ -232,7 +232,7 @@ This image is approximately 9 GB in size. As it includes embedding models, it re
|
||||
```bash
|
||||
git clone https://github.com/infiniflow/ragflow.git
|
||||
cd ragflow/
|
||||
pip3 install huggingface-hub
|
||||
pip3 install huggingface-hub nltk
|
||||
python3 download_deps.py
|
||||
docker build -f Dockerfile -t infiniflow/ragflow:dev .
|
||||
```
|
||||
|
@ -202,7 +202,7 @@
|
||||
```bash
|
||||
git clone https://github.com/infiniflow/ragflow.git
|
||||
cd ragflow/
|
||||
pip3 install huggingface-hub
|
||||
pip3 install huggingface-hub nltk
|
||||
python3 download_deps.py
|
||||
docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .
|
||||
```
|
||||
@ -214,7 +214,7 @@ docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .
|
||||
```bash
|
||||
git clone https://github.com/infiniflow/ragflow.git
|
||||
cd ragflow/
|
||||
pip3 install huggingface-hub
|
||||
pip3 install huggingface-hub nltk
|
||||
python3 download_deps.py
|
||||
docker build -f Dockerfile -t infiniflow/ragflow:dev .
|
||||
```
|
||||
|
@ -204,7 +204,7 @@
|
||||
```bash
|
||||
git clone https://github.com/infiniflow/ragflow.git
|
||||
cd ragflow/
|
||||
pip3 install huggingface-hub
|
||||
pip3 install huggingface-hub nltk
|
||||
python3 download_deps.py
|
||||
docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .
|
||||
```
|
||||
@ -216,7 +216,7 @@ docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .
|
||||
```bash
|
||||
git clone https://github.com/infiniflow/ragflow.git
|
||||
cd ragflow/
|
||||
pip3 install huggingface-hub
|
||||
pip3 install huggingface-hub nltk
|
||||
python3 download_deps.py
|
||||
docker build -f Dockerfile -t infiniflow/ragflow:dev .
|
||||
```
|
||||
|
@ -204,7 +204,7 @@
|
||||
```bash
|
||||
git clone https://github.com/infiniflow/ragflow.git
|
||||
cd ragflow/
|
||||
pip3 install huggingface-hub
|
||||
pip3 install huggingface-hub nltk
|
||||
python3 download_deps.py
|
||||
docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .
|
||||
```
|
||||
@ -216,7 +216,7 @@ docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .
|
||||
```bash
|
||||
git clone https://github.com/infiniflow/ragflow.git
|
||||
cd ragflow/
|
||||
pip3 install huggingface-hub
|
||||
pip3 install huggingface-hub nltk
|
||||
python3 download_deps.py
|
||||
docker build -f Dockerfile -t infiniflow/ragflow:dev .
|
||||
```
|
||||
|
@ -64,7 +64,7 @@ This image is approximately 1 GB in size and relies on external LLM services, as
|
||||
|
||||
On a `linux/amd64` host:
|
||||
```bash
|
||||
pip3 install huggingface-hub
|
||||
pip3 install huggingface-hub nltk
|
||||
python3 download_deps.py
|
||||
docker build -f Dockerfile -t infiniflow/ragflow:dev-amd64 .
|
||||
docker push infiniflow/ragflow:dev-amd64
|
||||
@ -72,7 +72,7 @@ docker push infiniflow/ragflow:dev-amd64
|
||||
|
||||
On a `linux/arm64` host:
|
||||
```bash
|
||||
pip3 install huggingface-hub
|
||||
pip3 install huggingface-hub nltk
|
||||
python3 download_deps.py
|
||||
docker build -f Dockerfile -t infiniflow/ragflow:dev-arm64 .
|
||||
docker push infiniflow/ragflow:dev-arm64
|
||||
|
@ -1,6 +1,7 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from huggingface_hub import snapshot_download
|
||||
import nltk
|
||||
import os
|
||||
|
||||
repos = [
|
||||
@ -12,13 +13,17 @@ repos = [
|
||||
"maidalun1020/bce-reranker-base_v1",
|
||||
]
|
||||
|
||||
|
||||
def download_model(repo_id):
|
||||
local_dir = os.path.join("huggingface.co", repo_id)
|
||||
local_dir = os.path.abspath(os.path.join("huggingface.co", repo_id))
|
||||
os.makedirs(local_dir, exist_ok=True)
|
||||
snapshot_download(repo_id=repo_id, local_dir=local_dir)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
local_dir = os.path.abspath('nltk_data')
|
||||
for data in ['wordnet', 'punkt', 'wordnet']:
|
||||
print(f"Downloading nltk {data}...")
|
||||
nltk.download(data, download_dir=local_dir)
|
||||
for repo_id in repos:
|
||||
print(f"Downloading huggingface repo {repo_id}...")
|
||||
download_model(repo_id)
|
||||
|
Loading…
x
Reference in New Issue
Block a user