mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-06-02 08:47:33 +08:00

### What problem does this PR solve? https://github.com/infiniflow/ragflow/issues/4319 This pull request includes several changes to improve the Docker setup and documentation for the project. The most important changes include updating the Dockerfile to support modern versions of Rust, adding a new Docker Compose configuration for macOS, and updating the build instructions in the documentation. Improvements to Docker setup: * [`Dockerfile`](diffhunk://#diff-dd2c0eb6ea5cfc6c4bd4eac30934e2d5746747af48fef6da689e85b752f39557L80-R107): Added installation steps for a modern version of Rust and updated the logic for installing the correct ODBC driver based on the architecture. * [`docker/docker-compose-macos.yml`](diffhunk://#diff-8e8587143bb2442c02f6dff4caa217ebbe3ba4ec8e7c23b2e568886a67b00eafR1-R56): Added a new Docker Compose configuration file specifically for macOS, including service dependencies, environment variables, and volume mappings. Updates to documentation: * [`docs/guides/develop/build_docker_image.mdx`](diffhunk://#diff-d6136bb897f7245aae33b0accbcf7c508ceaef005c545f9f09cad3cada840a19L44-R44): Updated the build instructions to use the new Docker Compose configuration for macOS instead of the previous Docker build command. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) - [x] Documentation Update --------- Signed-off-by: Samuel Giffard <samuel.giffard@mytomorrows.com>
210 lines
8.2 KiB
Docker
210 lines
8.2 KiB
Docker
# base stage
|
|
FROM ubuntu:22.04 AS base
|
|
USER root
|
|
SHELL ["/bin/bash", "-c"]
|
|
|
|
ARG NEED_MIRROR=0
|
|
ARG LIGHTEN=0
|
|
ENV LIGHTEN=${LIGHTEN}
|
|
|
|
WORKDIR /ragflow
|
|
|
|
# Copy models downloaded via download_deps.py
|
|
RUN mkdir -p /ragflow/rag/res/deepdoc /root/.ragflow
|
|
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \
|
|
cp /huggingface.co/InfiniFlow/huqie/huqie.txt.trie /ragflow/rag/res/ && \
|
|
tar --exclude='.*' -cf - \
|
|
/huggingface.co/InfiniFlow/text_concat_xgb_v1.0 \
|
|
/huggingface.co/InfiniFlow/deepdoc \
|
|
| tar -xf - --strip-components=3 -C /ragflow/rag/res/deepdoc
|
|
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \
|
|
if [ "$LIGHTEN" != "1" ]; then \
|
|
(tar -cf - \
|
|
/huggingface.co/BAAI/bge-large-zh-v1.5 \
|
|
/huggingface.co/BAAI/bge-reranker-v2-m3 \
|
|
/huggingface.co/maidalun1020/bce-embedding-base_v1 \
|
|
/huggingface.co/maidalun1020/bce-reranker-base_v1 \
|
|
| tar -xf - --strip-components=2 -C /root/.ragflow) \
|
|
fi
|
|
|
|
# https://github.com/chrismattmann/tika-python
|
|
# This is the only way to run python-tika without internet access. Without this set, the default is to check the tika version and pull latest every time from Apache.
|
|
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \
|
|
cp -r /deps/nltk_data /root/ && \
|
|
cp /deps/tika-server-standard-3.0.0.jar /deps/tika-server-standard-3.0.0.jar.md5 /ragflow/ && \
|
|
cp /deps/cl100k_base.tiktoken /ragflow/9b5ad71b2ce5302211f9c61530b329a4922fc6a4
|
|
|
|
ENV TIKA_SERVER_JAR="file:///ragflow/tika-server-standard-3.0.0.jar"
|
|
ENV DEBIAN_FRONTEND=noninteractive
|
|
|
|
# Setup apt
|
|
# Python package and implicit dependencies:
|
|
# opencv-python: libglib2.0-0 libglx-mesa0 libgl1
|
|
# aspose-slides: pkg-config libicu-dev libgdiplus libssl1.1_1.1.1f-1ubuntu2_amd64.deb
|
|
# python-pptx: default-jdk tika-server-standard-3.0.0.jar
|
|
# selenium: libatk-bridge2.0-0 chrome-linux64-121-0-6167-85
|
|
# Building C extensions: libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev
|
|
RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
|
|
if [ "$NEED_MIRROR" == "1" ]; then \
|
|
sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list; \
|
|
fi; \
|
|
rm -f /etc/apt/apt.conf.d/docker-clean && \
|
|
echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache && \
|
|
chmod 1777 /tmp && \
|
|
apt update && \
|
|
apt --no-install-recommends install -y ca-certificates && \
|
|
apt update && \
|
|
apt install -y libglib2.0-0 libglx-mesa0 libgl1 && \
|
|
apt install -y pkg-config libicu-dev libgdiplus && \
|
|
apt install -y default-jdk && \
|
|
apt install -y libatk-bridge2.0-0 && \
|
|
apt install -y libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev && \
|
|
apt install -y python3-pip pipx nginx unzip curl wget git vim less
|
|
|
|
RUN if [ "$NEED_MIRROR" == "1" ]; then \
|
|
pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \
|
|
pip3 config set global.trusted-host pypi.tuna.tsinghua.edu.cn; \
|
|
mkdir -p /etc/uv && \
|
|
echo "[[index]]" > /etc/uv/uv.toml && \
|
|
echo 'url = "https://pypi.tuna.tsinghua.edu.cn/simple"' >> /etc/uv/uv.toml && \
|
|
echo "default = true" >> /etc/uv/uv.toml; \
|
|
fi; \
|
|
pipx install uv
|
|
|
|
ENV PYTHONDONTWRITEBYTECODE=1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1
|
|
ENV PATH=/root/.local/bin:$PATH
|
|
|
|
# nodejs 12.22 on Ubuntu 22.04 is too old
|
|
RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
|
|
curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
|
|
apt purge -y nodejs npm cargo && \
|
|
apt autoremove -y && \
|
|
apt update && \
|
|
apt install -y nodejs
|
|
|
|
# A modern version of cargo is needed for the latest version of the Rust compiler.
|
|
RUN apt update && apt install -y curl build-essential \
|
|
&& if [ "$NEED_MIRROR" == "1" ]; then \
|
|
# Use TUNA mirrors for rustup/rust dist files
|
|
export RUSTUP_DIST_SERVER="https://mirrors.tuna.tsinghua.edu.cn/rustup"; \
|
|
export RUSTUP_UPDATE_ROOT="https://mirrors.tuna.tsinghua.edu.cn/rustup/rustup"; \
|
|
echo "Using TUNA mirrors for Rustup."; \
|
|
fi; \
|
|
# Force curl to use HTTP/1.1
|
|
curl --proto '=https' --tlsv1.2 --http1.1 -sSf https://sh.rustup.rs | bash -s -- -y --profile minimal \
|
|
&& echo 'export PATH="/root/.cargo/bin:${PATH}"' >> /root/.bashrc
|
|
|
|
ENV PATH="/root/.cargo/bin:${PATH}"
|
|
|
|
RUN cargo --version && rustc --version
|
|
|
|
# Add msssql ODBC driver
|
|
# macOS ARM64 environment, install msodbcsql18.
|
|
# general x86_64 environment, install msodbcsql17.
|
|
RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
|
|
curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - && \
|
|
curl https://packages.microsoft.com/config/ubuntu/22.04/prod.list > /etc/apt/sources.list.d/mssql-release.list && \
|
|
apt update && \
|
|
arch="$(uname -m)"; \
|
|
if [ "$arch" = "arm64" ] || [ "$arch" = "aarch64" ]; then \
|
|
# ARM64 (macOS/Apple Silicon or Linux aarch64)
|
|
ACCEPT_EULA=Y apt install -y unixodbc-dev msodbcsql18; \
|
|
else \
|
|
# x86_64 or others
|
|
ACCEPT_EULA=Y apt install -y unixodbc-dev msodbcsql17; \
|
|
fi || \
|
|
{ echo "Failed to install ODBC driver"; exit 1; }
|
|
|
|
|
|
|
|
# Add dependencies of selenium
|
|
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/chrome-linux64-121-0-6167-85,target=/chrome-linux64.zip \
|
|
unzip /chrome-linux64.zip && \
|
|
mv chrome-linux64 /opt/chrome && \
|
|
ln -s /opt/chrome/chrome /usr/local/bin/
|
|
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/chromedriver-linux64-121-0-6167-85,target=/chromedriver-linux64.zip \
|
|
unzip -j /chromedriver-linux64.zip chromedriver-linux64/chromedriver && \
|
|
mv chromedriver /usr/local/bin/ && \
|
|
rm -f /usr/bin/google-chrome
|
|
|
|
# https://forum.aspose.com/t/aspose-slides-for-net-no-usable-version-of-libssl-found-with-linux-server/271344/13
|
|
# aspose-slides on linux/arm64 is unavailable
|
|
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \
|
|
if [ "$(uname -m)" = "x86_64" ]; then \
|
|
dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_amd64.deb; \
|
|
elif [ "$(uname -m)" = "aarch64" ]; then \
|
|
dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_arm64.deb; \
|
|
fi
|
|
|
|
|
|
# builder stage
|
|
FROM base AS builder
|
|
USER root
|
|
|
|
WORKDIR /ragflow
|
|
|
|
# install dependencies from uv.lock file
|
|
COPY pyproject.toml uv.lock ./
|
|
|
|
# https://github.com/astral-sh/uv/issues/10462
|
|
# uv records index url into uv.lock but doesn't failover among multiple indexes
|
|
RUN --mount=type=cache,id=ragflow_uv,target=/root/.cache/uv,sharing=locked \
|
|
if [ "$NEED_MIRROR" == "1" ]; then \
|
|
sed -i 's|pypi.org|pypi.tuna.tsinghua.edu.cn|g' uv.lock; \
|
|
else \
|
|
sed -i 's|pypi.tuna.tsinghua.edu.cn|pypi.org|g' uv.lock; \
|
|
fi; \
|
|
if [ "$LIGHTEN" == "1" ]; then \
|
|
uv sync --python 3.10 --frozen; \
|
|
else \
|
|
uv sync --python 3.10 --frozen --all-extras; \
|
|
fi
|
|
|
|
COPY web web
|
|
COPY docs docs
|
|
RUN --mount=type=cache,id=ragflow_npm,target=/root/.npm,sharing=locked \
|
|
cd web && npm install && npm run build
|
|
|
|
COPY .git /ragflow/.git
|
|
|
|
RUN version_info=$(git describe --tags --match=v* --first-parent --always); \
|
|
if [ "$LIGHTEN" == "1" ]; then \
|
|
version_info="$version_info slim"; \
|
|
else \
|
|
version_info="$version_info full"; \
|
|
fi; \
|
|
echo "RAGFlow version: $version_info"; \
|
|
echo $version_info > /ragflow/VERSION
|
|
|
|
# production stage
|
|
FROM base AS production
|
|
USER root
|
|
|
|
WORKDIR /ragflow
|
|
|
|
# Copy Python environment and packages
|
|
ENV VIRTUAL_ENV=/ragflow/.venv
|
|
COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
|
|
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
|
|
|
|
ENV PYTHONPATH=/ragflow/
|
|
|
|
COPY web web
|
|
COPY api api
|
|
COPY conf conf
|
|
COPY deepdoc deepdoc
|
|
COPY rag rag
|
|
COPY agent agent
|
|
COPY graphrag graphrag
|
|
COPY pyproject.toml uv.lock ./
|
|
|
|
COPY docker/service_conf.yaml.template ./conf/service_conf.yaml.template
|
|
COPY docker/entrypoint.sh docker/entrypoint-parser.sh ./
|
|
RUN chmod +x ./entrypoint*.sh
|
|
|
|
# Copy compiled web pages
|
|
COPY --from=builder /ragflow/web/dist /ragflow/web/dist
|
|
|
|
COPY --from=builder /ragflow/VERSION /ragflow/VERSION
|
|
ENTRYPOINT ["./entrypoint.sh"]
|