Added tika jar into image to avoid downloading (#3167)

### What problem does this PR solve?

Added tika jar into image to avoid downloading. Close #3017

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
Zhichang Yu 2024-11-03 00:20:26 +08:00 committed by GitHub
parent c7ea7e9974
commit c06e765a5b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 13 additions and 1 deletions

View File

@ -48,7 +48,7 @@ jobs:
- name: Build ragflow:dev-slim
run: |
RUNNER_WORKSPACE_PREFIX=${RUNNER_WORKSPACE_PREFIX:-$HOME}
cp -r ${RUNNER_WORKSPACE_PREFIX}/huggingface.co ${RUNNER_WORKSPACE_PREFIX}/nltk_data ${RUNNER_WORKSPACE_PREFIX}/libssl*.deb .
cp -r ${RUNNER_WORKSPACE_PREFIX}/huggingface.co ${RUNNER_WORKSPACE_PREFIX}/nltk_data ${RUNNER_WORKSPACE_PREFIX}/libssl*.deb ${RUNNER_WORKSPACE_PREFIX}/tika-server*.jar* .
sudo docker pull ubuntu:24.04
sudo docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .

View File

@ -104,6 +104,11 @@ RUN --mount=type=bind,source=huggingface.co,target=/huggingface.co \
# Copy nltk data downloaded via download_deps.py
COPY nltk_data /root/nltk_data
# https://github.com/chrismattmann/tika-python
# This is the only way to run python-tika without internet access. Without this set, the default is to check the tika version and pull latest every time from Apache.
COPY tika-server-standard-3.0.0.jar tika-server-standard-3.0.0.jar.md5 ./
ENV TIKA_SERVER_JAR="file:///ragflow/tika-server-standard.jar"
# Copy compiled web pages
COPY --from=builder /ragflow/web/dist /ragflow/web/dist

View File

@ -97,6 +97,11 @@ RUN --mount=type=bind,source=huggingface.co,target=/huggingface.co \
# Copy nltk data downloaded via download_deps.py
COPY nltk_data /root/nltk_data
# https://github.com/chrismattmann/tika-python
# This is the only way to run python-tika without internet access. Without this set, the default is to check the tika version and pull latest every time from Apache.
COPY tika-server-standard-3.0.0.jar tika-server-standard-3.0.0.jar.md5 ./
ENV TIKA_SERVER_JAR="file:///ragflow/tika-server-standard.jar"
# Copy compiled web pages
COPY --from=builder /ragflow/web/dist /ragflow/web/dist

View File

@ -7,6 +7,8 @@ import urllib.request
urls = [
"http://archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_amd64.deb",
"https://repo1.maven.org/maven2/org/apache/tika/tika-server-standard/3.0.0/tika-server-standard-3.0.0.jar",
"https://repo1.maven.org/maven2/org/apache/tika/tika-server-standard/3.0.0/tika-server-standard-3.0.0.jar.md5",
]
repos = [