From afe9269534fabc1db1984ece9a87e5c7c214142e Mon Sep 17 00:00:00 2001 From: Zhichang Yu Date: Tue, 4 Mar 2025 12:49:39 +0800 Subject: [PATCH] Introduced jemalloc (#5590) ### What problem does this PR solve? Introduced jemalloc. Python uses pymalloc (which is an reimplementation of gblibc malloc) to manage RES. It has pools for small objects to avoid returning memory to OS aggressively. My experience is: Replacing pymalloc with [jemalloc](https://github.com/jemalloc/jemalloc) can reduce RES and speedup task_executor.py. ### Type of change - [x] Performance Improvement --- Dockerfile | 1 + docker/entrypoint-parser.sh | 3 ++- docker/entrypoint.sh | 3 ++- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 7c518d411..4bd8ec94f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -59,6 +59,7 @@ RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \ apt install -y default-jdk && \ apt install -y libatk-bridge2.0-0 && \ apt install -y libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev && \ + apt install -y libjemalloc-dev && \ apt install -y python3-pip pipx nginx unzip curl wget git vim less RUN if [ "$NEED_MIRROR" == "1" ]; then \ diff --git a/docker/entrypoint-parser.sh b/docker/entrypoint-parser.sh index 899f16f8f..07dc241f8 100755 --- a/docker/entrypoint-parser.sh +++ b/docker/entrypoint-parser.sh @@ -15,8 +15,9 @@ CONSUMER_NO_BEG=$1 CONSUMER_NO_END=$2 function task_exe(){ + JEMALLOC_PATH=$(pkg-config --variable=libdir jemalloc)/libjemalloc.so while [ 1 -eq 1 ]; do - $PY rag/svr/task_executor.py $1; + LD_PRELOAD=$JEMALLOC_PATH $PY rag/svr/task_executor.py $1; done } diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index 388102960..37bfb29b6 100755 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -17,8 +17,9 @@ if [[ -z "$WS" || $WS -lt 1 ]]; then fi function task_exe(){ + JEMALLOC_PATH=$(pkg-config --variable=libdir jemalloc)/libjemalloc.so while [ 1 -eq 1 ];do - $PY rag/svr/task_executor.py $1; + LD_PRELOAD=$JEMALLOC_PATH $PY rag/svr/task_executor.py $1; done }