mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-06-04 11:24:40 +08:00

* feat: pdf-parser, implementation in scrapeURL * use pdf-parser for page count instead of mu * fix(pdf-parser): bindings * feat(scrapeURL/pdf): adjust MILLISECONDS_PER_PAGE * implement post-runsync polling and fix * fix(Dockerfile): copy in the pdf-parser source code * fix(scrapeURL/pdf): better error for timeout below 0
58 lines
2.0 KiB
Docker
58 lines
2.0 KiB
Docker
FROM node:22-slim AS base
|
|
|
|
ENV PNPM_HOME="/pnpm"
|
|
ENV PATH="$PNPM_HOME:$PATH"
|
|
|
|
RUN corepack enable
|
|
COPY . /app
|
|
WORKDIR /app
|
|
|
|
FROM base AS prod-deps
|
|
RUN --mount=type=cache,id=pnpm,target=/pnpm/store pnpm install --prod --frozen-lockfile
|
|
|
|
FROM base AS build
|
|
RUN --mount=type=cache,id=pnpm,target=/pnpm/store pnpm install && pnpm run build
|
|
|
|
# Install Go
|
|
FROM golang:1.24 AS go-base
|
|
COPY sharedLibs/go-html-to-md /app/sharedLibs/go-html-to-md
|
|
|
|
# Install Go dependencies and build parser lib
|
|
RUN cd /app/sharedLibs/go-html-to-md && \
|
|
go mod tidy && \
|
|
go build -o html-to-markdown.so -buildmode=c-shared html-to-markdown.go && \
|
|
chmod +x html-to-markdown.so
|
|
|
|
# Install Rust
|
|
FROM rust:1-slim AS rust-base
|
|
COPY sharedLibs/html-transformer /app/sharedLibs/html-transformer
|
|
COPY sharedLibs/pdf-parser /app/sharedLibs/pdf-parser
|
|
|
|
# Install Rust dependencies and build transformer lib
|
|
RUN cd /app/sharedLibs/html-transformer && \
|
|
cargo build --release && \
|
|
chmod +x target/release/libhtml_transformer.so
|
|
|
|
# Install Rust dependencies and build PDF parser lib
|
|
RUN cd /app/sharedLibs/pdf-parser && \
|
|
cargo build --release && \
|
|
chmod +x target/release/libpdf_parser.so
|
|
|
|
FROM base
|
|
COPY --from=build /app/dist /app/dist
|
|
COPY --from=prod-deps /app/node_modules /app/node_modules
|
|
COPY --from=go-base /app/sharedLibs/go-html-to-md/html-to-markdown.so /app/sharedLibs/go-html-to-md/html-to-markdown.so
|
|
COPY --from=rust-base /app/sharedLibs/html-transformer/target/release/libhtml_transformer.so /app/sharedLibs/html-transformer/target/release/libhtml_transformer.so
|
|
COPY --from=rust-base /app/sharedLibs/pdf-parser/target/release/libpdf_parser.so /app/sharedLibs/pdf-parser/target/release/libpdf_parser.so
|
|
|
|
# Install git
|
|
RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
|
|
|
|
# Start the server by default, this can be overwritten at runtime
|
|
EXPOSE 8080
|
|
|
|
# Make sure the entrypoint script has the correct line endings
|
|
RUN sed -i 's/\r$//' /app/docker-entrypoint.sh
|
|
|
|
ENTRYPOINT "/app/docker-entrypoint.sh"
|