make it lighten (#2577)

### What problem does this PR solve?

#2295

### Type of change

- [x] Refactoring
This commit is contained in:
Kevin Hu 2024-09-25 13:38:40 +08:00 committed by GitHub
parent e4c9cf2264
commit dda1367ab2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 13 additions and 12 deletions

View File

@ -14,7 +14,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
RUN sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list.d/ubuntu.sources RUN sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list.d/ubuntu.sources
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt update && apt install -y curl libpython3-dev nginx openmpi-bin openmpi-common libopenmpi-dev libglib2.0-0 libglx-mesa0 \ apt update && apt install -y curl libpython3-dev nginx libglib2.0-0 libglx-mesa0 \
&& rm -rf /var/lib/apt/lists/* \ && rm -rf /var/lib/apt/lists/* \
&& curl -sSL https://install.python-poetry.org | python3 - && curl -sSL https://install.python-poetry.org | python3 -
@ -33,7 +33,7 @@ USER root
WORKDIR /ragflow WORKDIR /ragflow
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt update && apt install -y nodejs npm cargo && \ apt update && apt install -y nodejs npm && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
COPY web web COPY web web
@ -42,7 +42,10 @@ RUN cd web && npm i --force && npm run build
# install dependencies from poetry.lock file # install dependencies from poetry.lock file
COPY pyproject.toml poetry.toml poetry.lock ./ COPY pyproject.toml poetry.toml poetry.lock ./
RUN --mount=type=cache,target=/root/.cache/pypoetry,sharing=locked \ RUN --mount=type=cache,target=/root/.cache/pypoetry,sharing=locked \
/root/.local/bin/poetry install --sync --no-root /root/.local/bin/poetry lock
RUN --mount=type=cache,target=/root/.cache/pypoetry,sharing=locked \
/root/.local/bin/poetry install --sync --no-cache --no-root
# production stage # production stage
FROM base AS production FROM base AS production
@ -76,8 +79,6 @@ ENV PATH="${VIRTUAL_ENV}/bin:/root/.local/bin:${PATH}"
# Download nltk data # Download nltk data
RUN python3 -m nltk.downloader wordnet punkt punkt_tab RUN python3 -m nltk.downloader wordnet punkt punkt_tab
# Copy models downloaded via download_deps.sh
COPY det.onnx layout.laws.onnx layout.manual.onnx layout.onnx layout.paper.onnx ocr.res rec.onnx tsr.onnx updown_concat_xgb.model /ragflow/rag/res/deepdoc/
ENV PYTHONPATH=/ragflow/ ENV PYTHONPATH=/ragflow/

View File

@ -99,7 +99,7 @@ tika = "2.6.0"
tiktoken = "0.6.0" tiktoken = "0.6.0"
torch = "2.3.0" torch = "2.3.0"
transformers = "4.38.1" transformers = "4.38.1"
umap = "0.1.1" umap_learn = "0.5.6"
vertexai = "1.64.0" vertexai = "1.64.0"
volcengine = "1.0.146" volcengine = "1.0.146"
voyageai = "0.2.3" voyageai = "0.2.3"

View File

@ -245,8 +245,8 @@ class FastEmbed(Base):
threads: Optional[int] = None, threads: Optional[int] = None,
**kwargs, **kwargs,
): ):
from fastembed import TextEmbedding if not LIGHTEN and not FastEmbed._model:
if not FastEmbed._model: from fastembed import TextEmbedding
self._model = TextEmbedding(model_name, cache_dir, threads, **kwargs) self._model = TextEmbedding(model_name, cache_dir, threads, **kwargs)
def encode(self, texts: list, batch_size=32): def encode(self, texts: list, batch_size=32):
@ -291,8 +291,8 @@ class YoudaoEmbed(Base):
_client = None _client = None
def __init__(self, key=None, model_name="maidalun1020/bce-embedding-base_v1", **kwargs): def __init__(self, key=None, model_name="maidalun1020/bce-embedding-base_v1", **kwargs):
from BCEmbedding import EmbeddingModel as qanthing if not LIGHTEN and not YoudaoEmbed._client:
if not YoudaoEmbed._client: from BCEmbedding import EmbeddingModel as qanthing
try: try:
print("LOADING BCE...") print("LOADING BCE...")
YoudaoEmbed._client = qanthing(model_name_or_path=os.path.join( YoudaoEmbed._client = qanthing(model_name_or_path=os.path.join(

View File

@ -109,8 +109,8 @@ class YoudaoRerank(DefaultRerank):
_model_lock = threading.Lock() _model_lock = threading.Lock()
def __init__(self, key=None, model_name="maidalun1020/bce-reranker-base_v1", **kwargs): def __init__(self, key=None, model_name="maidalun1020/bce-reranker-base_v1", **kwargs):
from BCEmbedding import RerankerModel if not LIGHTEN and not YoudaoRerank._model:
if not YoudaoRerank._model: from BCEmbedding import RerankerModel
with YoudaoRerank._model_lock: with YoudaoRerank._model_lock:
if not YoudaoRerank._model: if not YoudaoRerank._model:
try: try: