firecrawl/apps/api/Dockerfile
Gergő Móricz b03670a8b7
feat: parse PDFs on fc side and reject if too long for timeout (FIR-2083) (#1592)
* feat: pdf-parser, implementation in scrapeURL

* use pdf-parser for page count instead of mu

* fix(pdf-parser): bindings

* feat(scrapeURL/pdf): adjust MILLISECONDS_PER_PAGE

* implement post-runsync polling and fix

* fix(Dockerfile): copy in the pdf-parser source code

* fix(scrapeURL/pdf): better error for timeout below 0
2025-05-23 13:45:53 +02:00

58 lines
2.0 KiB
Docker

FROM node:22-slim AS base
ENV PNPM_HOME="/pnpm"
ENV PATH="$PNPM_HOME:$PATH"
RUN corepack enable
COPY . /app
WORKDIR /app
FROM base AS prod-deps
RUN --mount=type=cache,id=pnpm,target=/pnpm/store pnpm install --prod --frozen-lockfile
FROM base AS build
RUN --mount=type=cache,id=pnpm,target=/pnpm/store pnpm install && pnpm run build
# Install Go
FROM golang:1.24 AS go-base
COPY sharedLibs/go-html-to-md /app/sharedLibs/go-html-to-md
# Install Go dependencies and build parser lib
RUN cd /app/sharedLibs/go-html-to-md && \
go mod tidy && \
go build -o html-to-markdown.so -buildmode=c-shared html-to-markdown.go && \
chmod +x html-to-markdown.so
# Install Rust
FROM rust:1-slim AS rust-base
COPY sharedLibs/html-transformer /app/sharedLibs/html-transformer
COPY sharedLibs/pdf-parser /app/sharedLibs/pdf-parser
# Install Rust dependencies and build transformer lib
RUN cd /app/sharedLibs/html-transformer && \
cargo build --release && \
chmod +x target/release/libhtml_transformer.so
# Install Rust dependencies and build PDF parser lib
RUN cd /app/sharedLibs/pdf-parser && \
cargo build --release && \
chmod +x target/release/libpdf_parser.so
FROM base
COPY --from=build /app/dist /app/dist
COPY --from=prod-deps /app/node_modules /app/node_modules
COPY --from=go-base /app/sharedLibs/go-html-to-md/html-to-markdown.so /app/sharedLibs/go-html-to-md/html-to-markdown.so
COPY --from=rust-base /app/sharedLibs/html-transformer/target/release/libhtml_transformer.so /app/sharedLibs/html-transformer/target/release/libhtml_transformer.so
COPY --from=rust-base /app/sharedLibs/pdf-parser/target/release/libpdf_parser.so /app/sharedLibs/pdf-parser/target/release/libpdf_parser.so
# Install git
RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
# Start the server by default, this can be overwritten at runtime
EXPOSE 8080
# Make sure the entrypoint script has the correct line endings
RUN sed -i 's/\r$//' /app/docker-entrypoint.sh
ENTRYPOINT "/app/docker-entrypoint.sh"