From 497aa5d25ef0c2d4ed5421689c57bae64b759dd5 Mon Sep 17 00:00:00 2001 From: Jakob Stadlhuber Date: Wed, 24 Jul 2024 17:55:45 +0200 Subject: [PATCH 1/7] Update Kubernetes configs for playwright-service, api, and worker Added new ConfigMap for playwright-service and adjusted existing references. Applied imagePullPolicy: Always to ensure all images are updated promptly. Updated README to include --no-cache for Docker build instructions. --- examples/kubernetes/cluster-install/README.md | 4 ++-- examples/kubernetes/cluster-install/api.yaml | 5 +++-- examples/kubernetes/cluster-install/configmap.yaml | 6 ++---- .../kubernetes/cluster-install/playwright-service.yaml | 10 +++++++++- examples/kubernetes/cluster-install/worker.yaml | 5 +++-- 5 files changed, 19 insertions(+), 11 deletions(-) diff --git a/examples/kubernetes/cluster-install/README.md b/examples/kubernetes/cluster-install/README.md index f874d829..2ae39893 100644 --- a/examples/kubernetes/cluster-install/README.md +++ b/examples/kubernetes/cluster-install/README.md @@ -4,12 +4,12 @@ 2. Build Docker images, and host it in your Docker Registry (replace the target registry with your own) 1. API (which is also used as a worker image) 1. ```bash - docker build -t ghcr.io/winkk-dev/firecrawl:latest ../../apps/api + docker build --no-cache -t ghcr.io/winkk-dev/firecrawl:latest ../../apps/api docker push ghcr.io/winkk-dev/firecrawl:latest ``` 2. Playwright 1. ```bash - docker build -t ghcr.io/winkk-dev/firecrawl-playwright:latest ../../apps/playwright-service + docker build --no-cache -t ghcr.io/winkk-dev/firecrawl-playwright:latest ../../apps/playwright-service docker push ghcr.io/winkk-dev/firecrawl-playwright:latest ``` 3. Replace the image in [worker.yaml](worker.yaml), [api.yaml](api.yaml) and [playwright-service.yaml](playwright-service.yaml) diff --git a/examples/kubernetes/cluster-install/api.yaml b/examples/kubernetes/cluster-install/api.yaml index cdc69c3d..81e61839 100644 --- a/examples/kubernetes/cluster-install/api.yaml +++ b/examples/kubernetes/cluster-install/api.yaml @@ -17,14 +17,15 @@ spec: containers: - name: api image: ghcr.io/winkk-dev/firecrawl:latest + imagePullPolicy: Always args: [ "pnpm", "run", "start:production" ] ports: - containerPort: 3002 envFrom: - configMapRef: name: firecrawl-config - - secretRef: - name: firecrawl-secret + #- secretRef: + # name: firecrawl-secret --- apiVersion: v1 kind: Service diff --git a/examples/kubernetes/cluster-install/configmap.yaml b/examples/kubernetes/cluster-install/configmap.yaml index b415d562..32559de7 100644 --- a/examples/kubernetes/cluster-install/configmap.yaml +++ b/examples/kubernetes/cluster-install/configmap.yaml @@ -7,8 +7,6 @@ data: PORT: "3002" HOST: "0.0.0.0" REDIS_URL: "redis://redis:6379" - PLAYWRIGHT_MICROSERVICE_URL: "http://playwright-service:3000" + PLAYWRIGHT_MICROSERVICE_URL: "http://playwright-service:3000/html" USE_DB_AUTHENTICATION: "false" - SUPABASE_ANON_TOKEN: "" - SUPABASE_URL: "" - SUPABASE_SERVICE_TOKEN: "" + HDX_NODE_BETA_MODE: "1" diff --git a/examples/kubernetes/cluster-install/playwright-service.yaml b/examples/kubernetes/cluster-install/playwright-service.yaml index ce794253..e916d914 100644 --- a/examples/kubernetes/cluster-install/playwright-service.yaml +++ b/examples/kubernetes/cluster-install/playwright-service.yaml @@ -1,3 +1,10 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: playwright-service-config +data: + PORT: "3000" +--- apiVersion: apps/v1 kind: Deployment metadata: @@ -17,11 +24,12 @@ spec: containers: - name: playwright-service image: ghcr.io/winkk-dev/firecrawl-playwright:latest + imagePullPolicy: Always ports: - containerPort: 3000 envFrom: - configMapRef: - name: firecrawl-config + name: playwright-service-config --- apiVersion: v1 kind: Service diff --git a/examples/kubernetes/cluster-install/worker.yaml b/examples/kubernetes/cluster-install/worker.yaml index 2b3b2e79..545beaa3 100644 --- a/examples/kubernetes/cluster-install/worker.yaml +++ b/examples/kubernetes/cluster-install/worker.yaml @@ -17,8 +17,9 @@ spec: containers: - name: worker image: ghcr.io/winkk-dev/firecrawl:latest + imagePullPolicy: Always envFrom: - configMapRef: name: firecrawl-config - - secretRef: - name: firecrawl-secret + #- secretRef: + # name: firecrawl-secret From be9e7f9edf039b523a53a6ea1e38e5975ae1433b Mon Sep 17 00:00:00 2001 From: Jakob Stadlhuber Date: Wed, 24 Jul 2024 18:54:16 +0200 Subject: [PATCH 2/7] Update Kubernetes configs for playwright-service, api, and worker Added new ConfigMap for playwright-service and adjusted existing references. Applied imagePullPolicy: Always to ensure all images are updated promptly. Updated README to include --no-cache for Docker build instructions. --- apps/api/src/controllers/liveness.ts | 6 ++++++ apps/api/src/controllers/readiness.ts | 6 ++++++ apps/api/src/routes/v0.ts | 5 +++++ apps/playwright-service/main.py | 20 +++++++++++++++++--- 4 files changed, 34 insertions(+), 3 deletions(-) create mode 100644 apps/api/src/controllers/liveness.ts create mode 100644 apps/api/src/controllers/readiness.ts diff --git a/apps/api/src/controllers/liveness.ts b/apps/api/src/controllers/liveness.ts new file mode 100644 index 00000000..8ff1a96f --- /dev/null +++ b/apps/api/src/controllers/liveness.ts @@ -0,0 +1,6 @@ +import { Request, Response } from "express"; + +export async function livenessController(req: Request, res: Response) { + //TODO: add checks if the application is live and healthy like checking the redis connection + res.status(200).json({ status: "ok" }); +} diff --git a/apps/api/src/controllers/readiness.ts b/apps/api/src/controllers/readiness.ts new file mode 100644 index 00000000..cdb1f02c --- /dev/null +++ b/apps/api/src/controllers/readiness.ts @@ -0,0 +1,6 @@ +import { Request, Response } from "express"; + +export async function readinessController(req: Request, res: Response) { + // TODO: add checks when the application is ready to serve traffic + res.status(200).json({ status: "ok" }); +} diff --git a/apps/api/src/routes/v0.ts b/apps/api/src/routes/v0.ts index a9a3a9bf..4284b77c 100644 --- a/apps/api/src/routes/v0.ts +++ b/apps/api/src/routes/v0.ts @@ -7,6 +7,8 @@ import { crawlJobStatusPreviewController } from "../../src/controllers/status"; import { searchController } from "../../src/controllers/search"; import { crawlCancelController } from "../../src/controllers/crawl-cancel"; import { keyAuthController } from "../../src/controllers/keyAuth"; +import {livenessController} from "../controllers/liveness"; +import {readinessController} from "../controllers/readiness"; export const v0Router = express.Router(); @@ -23,3 +25,6 @@ v0Router.get("/v0/keyAuth", keyAuthController); // Search routes v0Router.post("/v0/search", searchController); +// Health/Probe routes +v0Router.get("/v0/health/liveness", livenessController); +v0Router.get("/v0/health/readiness", readinessController); diff --git a/apps/playwright-service/main.py b/apps/playwright-service/main.py index bd6b14e3..c9099d3b 100644 --- a/apps/playwright-service/main.py +++ b/apps/playwright-service/main.py @@ -5,7 +5,7 @@ the HTML content of a specified URL. It supports optional proxy settings and med from os import environ -from fastapi import FastAPI +from fastapi import FastAPI, Response from fastapi.responses import JSONResponse from playwright.async_api import Browser, async_playwright from pydantic import BaseModel @@ -39,14 +39,28 @@ async def shutdown_event(): """Event handler for application shutdown to close the browser.""" await browser.close() +@app.get("/health/liveness") +def liveness_probe(): + """Endpoint for liveness probe.""" + return JSONResponse(content={"status": "ok"}, status_code=200) + + +@app.get("/health/readiness") +async def readiness_probe(): + """Endpoint for readiness probe. Checks if the browser instance is ready.""" + if browser: + return JSONResponse(content={"status": "ok"}, status_code=200) + return JSONResponse(content={"status": "Service Unavailable"}, status_code=503) + + @app.post("/html") async def root(body: UrlModel): """ Endpoint to fetch and return HTML content of a given URL. - + Args: body (UrlModel): The URL model containing the target URL, wait time, and timeout. - + Returns: JSONResponse: The HTML content of the page. """ From 895e80caa421e07f14f7c7e7496e2a86f72db198 Mon Sep 17 00:00:00 2001 From: Jakob Stadlhuber Date: Wed, 24 Jul 2024 19:00:23 +0200 Subject: [PATCH 3/7] Add liveness and readiness probes to Kubernetes configs Introduced liveness and readiness probes for the Playwright service, API, and worker components. This ensures that Kubernetes can better manage the health and availability of these services by periodically checking their endpoints. This enhancement will improve the robustness and reliability of the deployed applications. --- examples/kubernetes/cluster-install/api.yaml | 20 +++++++++++++++++++ .../cluster-install/playwright-service.yaml | 20 +++++++++++++++++++ .../kubernetes/cluster-install/worker.yaml | 20 +++++++++++++++++++ 3 files changed, 60 insertions(+) diff --git a/examples/kubernetes/cluster-install/api.yaml b/examples/kubernetes/cluster-install/api.yaml index 81e61839..c709857c 100644 --- a/examples/kubernetes/cluster-install/api.yaml +++ b/examples/kubernetes/cluster-install/api.yaml @@ -26,6 +26,26 @@ spec: name: firecrawl-config #- secretRef: # name: firecrawl-secret + - livenessProbe: + httpGet: + path: /v0/health/liveness + port: 3002 + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 5 + successThreshold: 1 + failureThreshold: 3 + name: api-container + - readinessProbe: + httpGet: + path: /v0/health/readiness + port: 3002 + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 5 + successThreshold: 1 + failureThreshold: 3 + name: api-container --- apiVersion: v1 kind: Service diff --git a/examples/kubernetes/cluster-install/playwright-service.yaml b/examples/kubernetes/cluster-install/playwright-service.yaml index e916d914..a9c24d42 100644 --- a/examples/kubernetes/cluster-install/playwright-service.yaml +++ b/examples/kubernetes/cluster-install/playwright-service.yaml @@ -30,6 +30,26 @@ spec: envFrom: - configMapRef: name: playwright-service-config + - livenessProbe: + httpGet: + path: /health/liveness + port: 3000 + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 5 + successThreshold: 1 + failureThreshold: 3 + name: playwright-service-container + - readinessProbe: + httpGet: + path: /health/readiness + port: 3000 + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 5 + successThreshold: 1 + failureThreshold: 3 + name: playwright-service-container --- apiVersion: v1 kind: Service diff --git a/examples/kubernetes/cluster-install/worker.yaml b/examples/kubernetes/cluster-install/worker.yaml index 545beaa3..b904bd78 100644 --- a/examples/kubernetes/cluster-install/worker.yaml +++ b/examples/kubernetes/cluster-install/worker.yaml @@ -23,3 +23,23 @@ spec: name: firecrawl-config #- secretRef: # name: firecrawl-secret + - livenessProbe: + httpGet: + path: /v0/health/liveness + port: 3002 + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 5 + successThreshold: 1 + failureThreshold: 3 + name: worker-container + - readinessProbe: + httpGet: + path: /v0/health/readiness + port: 3002 + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 5 + successThreshold: 1 + failureThreshold: 3 + name: worker-container \ No newline at end of file From f26bda2477b84e728a742a21268af863fea8218a Mon Sep 17 00:00:00 2001 From: Jakob Stadlhuber Date: Wed, 24 Jul 2024 19:06:19 +0200 Subject: [PATCH 4/7] Update Docker build paths in Kubernetes setup README Corrected relative paths for Docker build commands to ensure the appropriate directories are targeted. This fix is crucial for successful image builds and deployment consistency in the Kubernetes cluster setup. --- examples/kubernetes/cluster-install/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/kubernetes/cluster-install/README.md b/examples/kubernetes/cluster-install/README.md index 2ae39893..736ae038 100644 --- a/examples/kubernetes/cluster-install/README.md +++ b/examples/kubernetes/cluster-install/README.md @@ -4,12 +4,12 @@ 2. Build Docker images, and host it in your Docker Registry (replace the target registry with your own) 1. API (which is also used as a worker image) 1. ```bash - docker build --no-cache -t ghcr.io/winkk-dev/firecrawl:latest ../../apps/api + docker build --no-cache -t ghcr.io/winkk-dev/firecrawl:latest ../../../apps/api docker push ghcr.io/winkk-dev/firecrawl:latest ``` 2. Playwright 1. ```bash - docker build --no-cache -t ghcr.io/winkk-dev/firecrawl-playwright:latest ../../apps/playwright-service + docker build --no-cache -t ghcr.io/winkk-dev/firecrawl-playwright:latest ../../../apps/playwright-service docker push ghcr.io/winkk-dev/firecrawl-playwright:latest ``` 3. Replace the image in [worker.yaml](worker.yaml), [api.yaml](api.yaml) and [playwright-service.yaml](playwright-service.yaml) From d68f3491099cfe3d20af700e352200cf6808a365 Mon Sep 17 00:00:00 2001 From: Jakob Stadlhuber Date: Wed, 24 Jul 2024 19:31:37 +0200 Subject: [PATCH 5/7] Update Kubernetes YAMLs and add worker service Refactored container configurations in worker, api, and playwright-service YAMLs to streamline syntax and add missing fields. Added a service definition for the worker component and included a new environment variable in the configmap for rate-limiting. These changes enhance configuration clarity and ensure proper resource definitions. --- examples/kubernetes/cluster-install/api.yaml | 60 ++++++++--------- .../kubernetes/cluster-install/configmap.yaml | 1 + .../cluster-install/playwright-service.yaml | 54 +++++++-------- .../kubernetes/cluster-install/worker.yaml | 67 +++++++++++-------- 4 files changed, 95 insertions(+), 87 deletions(-) diff --git a/examples/kubernetes/cluster-install/api.yaml b/examples/kubernetes/cluster-install/api.yaml index c709857c..54ecfbf6 100644 --- a/examples/kubernetes/cluster-install/api.yaml +++ b/examples/kubernetes/cluster-install/api.yaml @@ -15,37 +15,35 @@ spec: imagePullSecrets: - name: docker-registry-secret containers: - - name: api - image: ghcr.io/winkk-dev/firecrawl:latest - imagePullPolicy: Always - args: [ "pnpm", "run", "start:production" ] - ports: - - containerPort: 3002 - envFrom: - - configMapRef: - name: firecrawl-config - #- secretRef: - # name: firecrawl-secret - - livenessProbe: - httpGet: - path: /v0/health/liveness - port: 3002 - initialDelaySeconds: 30 - periodSeconds: 30 - timeoutSeconds: 5 - successThreshold: 1 - failureThreshold: 3 - name: api-container - - readinessProbe: - httpGet: - path: /v0/health/readiness - port: 3002 - initialDelaySeconds: 30 - periodSeconds: 30 - timeoutSeconds: 5 - successThreshold: 1 - failureThreshold: 3 - name: api-container + - name: api + image: ghcr.io/winkk-dev/firecrawl:latest + imagePullPolicy: Always + args: [ "pnpm", "run", "start:production" ] + ports: + - containerPort: 3002 + envFrom: + - configMapRef: + name: firecrawl-config + #- secretRef: + # name: firecrawl-secret + livenessProbe: + httpGet: + path: /v0/health/liveness + port: 3002 + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 5 + successThreshold: 1 + failureThreshold: 3 + readinessProbe: + httpGet: + path: /v0/health/readiness + port: 3002 + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 5 + successThreshold: 1 + failureThreshold: 3 --- apiVersion: v1 kind: Service diff --git a/examples/kubernetes/cluster-install/configmap.yaml b/examples/kubernetes/cluster-install/configmap.yaml index 32559de7..b56cfbcd 100644 --- a/examples/kubernetes/cluster-install/configmap.yaml +++ b/examples/kubernetes/cluster-install/configmap.yaml @@ -7,6 +7,7 @@ data: PORT: "3002" HOST: "0.0.0.0" REDIS_URL: "redis://redis:6379" + REDIS_RATE_LIMIT_URL: "redis://redis:6379" PLAYWRIGHT_MICROSERVICE_URL: "http://playwright-service:3000/html" USE_DB_AUTHENTICATION: "false" HDX_NODE_BETA_MODE: "1" diff --git a/examples/kubernetes/cluster-install/playwright-service.yaml b/examples/kubernetes/cluster-install/playwright-service.yaml index a9c24d42..43cf15f0 100644 --- a/examples/kubernetes/cluster-install/playwright-service.yaml +++ b/examples/kubernetes/cluster-install/playwright-service.yaml @@ -22,34 +22,32 @@ spec: imagePullSecrets: - name: docker-registry-secret containers: - - name: playwright-service - image: ghcr.io/winkk-dev/firecrawl-playwright:latest - imagePullPolicy: Always - ports: - - containerPort: 3000 - envFrom: - - configMapRef: - name: playwright-service-config - - livenessProbe: - httpGet: - path: /health/liveness - port: 3000 - initialDelaySeconds: 30 - periodSeconds: 30 - timeoutSeconds: 5 - successThreshold: 1 - failureThreshold: 3 - name: playwright-service-container - - readinessProbe: - httpGet: - path: /health/readiness - port: 3000 - initialDelaySeconds: 30 - periodSeconds: 30 - timeoutSeconds: 5 - successThreshold: 1 - failureThreshold: 3 - name: playwright-service-container + - name: playwright-service + image: ghcr.io/winkk-dev/firecrawl-playwright:latest + imagePullPolicy: Always + ports: + - containerPort: 3000 + envFrom: + - configMapRef: + name: playwright-service-config + livenessProbe: + httpGet: + path: /health/liveness + port: 3000 + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 5 + successThreshold: 1 + failureThreshold: 3 + readinessProbe: + httpGet: + path: /health/readiness + port: 3000 + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 5 + successThreshold: 1 + failureThreshold: 3 --- apiVersion: v1 kind: Service diff --git a/examples/kubernetes/cluster-install/worker.yaml b/examples/kubernetes/cluster-install/worker.yaml index b904bd78..6185f991 100644 --- a/examples/kubernetes/cluster-install/worker.yaml +++ b/examples/kubernetes/cluster-install/worker.yaml @@ -15,31 +15,42 @@ spec: imagePullSecrets: - name: docker-registry-secret containers: - - name: worker - image: ghcr.io/winkk-dev/firecrawl:latest - imagePullPolicy: Always - envFrom: - - configMapRef: - name: firecrawl-config - #- secretRef: - # name: firecrawl-secret - - livenessProbe: - httpGet: - path: /v0/health/liveness - port: 3002 - initialDelaySeconds: 30 - periodSeconds: 30 - timeoutSeconds: 5 - successThreshold: 1 - failureThreshold: 3 - name: worker-container - - readinessProbe: - httpGet: - path: /v0/health/readiness - port: 3002 - initialDelaySeconds: 30 - periodSeconds: 30 - timeoutSeconds: 5 - successThreshold: 1 - failureThreshold: 3 - name: worker-container \ No newline at end of file + - name: worker + image: ghcr.io/winkk-dev/firecrawl:latest + imagePullPolicy: Always + args: [ "pnpm", "run", "workers" ] + envFrom: + - configMapRef: + name: firecrawl-config + #- secretRef: + # name: firecrawl-secret + livenessProbe: + httpGet: + path: /v0/health/liveness + port: 3003 + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 5 + successThreshold: 1 + failureThreshold: 3 + readinessProbe: + httpGet: + path: /v0/health/readiness + port: 3003 + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 5 + successThreshold: 1 + failureThreshold: 3 +--- +apiVersion: v1 +kind: Service +metadata: + name: worker +spec: + selector: + app: worker + ports: + - protocol: TCP + port: 3003 + targetPort: 3003 From 2dc7be38693310484b7eaf61f0b4e85b167bceef Mon Sep 17 00:00:00 2001 From: Jakob Stadlhuber Date: Wed, 24 Jul 2024 19:38:54 +0200 Subject: [PATCH 6/7] Remove liveness and readiness probes from worker.yaml This commit removes the liveness and readiness probes configuration from the Kubernetes worker manifest. Additionally, a Service definition for the worker application has been removed. These changes might be necessary to update the deployment strategy or simplify the configuration. --- .../kubernetes/cluster-install/worker.yaml | 30 ------------------- 1 file changed, 30 deletions(-) diff --git a/examples/kubernetes/cluster-install/worker.yaml b/examples/kubernetes/cluster-install/worker.yaml index 6185f991..8e992cf1 100644 --- a/examples/kubernetes/cluster-install/worker.yaml +++ b/examples/kubernetes/cluster-install/worker.yaml @@ -24,33 +24,3 @@ spec: name: firecrawl-config #- secretRef: # name: firecrawl-secret - livenessProbe: - httpGet: - path: /v0/health/liveness - port: 3003 - initialDelaySeconds: 30 - periodSeconds: 30 - timeoutSeconds: 5 - successThreshold: 1 - failureThreshold: 3 - readinessProbe: - httpGet: - path: /v0/health/readiness - port: 3003 - initialDelaySeconds: 30 - periodSeconds: 30 - timeoutSeconds: 5 - successThreshold: 1 - failureThreshold: 3 ---- -apiVersion: v1 -kind: Service -metadata: - name: worker -spec: - selector: - app: worker - ports: - - protocol: TCP - port: 3003 - targetPort: 3003 From 7129d7993ee1d51ab171d6d747a84ef111e1146c Mon Sep 17 00:00:00 2001 From: Nicolas Date: Thu, 25 Jul 2024 17:19:45 -0400 Subject: [PATCH 7/7] Update v0.ts --- apps/api/src/routes/v0.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/api/src/routes/v0.ts b/apps/api/src/routes/v0.ts index 4284b77c..9c68d9bb 100644 --- a/apps/api/src/routes/v0.ts +++ b/apps/api/src/routes/v0.ts @@ -7,8 +7,8 @@ import { crawlJobStatusPreviewController } from "../../src/controllers/status"; import { searchController } from "../../src/controllers/search"; import { crawlCancelController } from "../../src/controllers/crawl-cancel"; import { keyAuthController } from "../../src/controllers/keyAuth"; -import {livenessController} from "../controllers/liveness"; -import {readinessController} from "../controllers/readiness"; +import { livenessController } from "../controllers/liveness"; +import { readinessController } from "../controllers/readiness"; export const v0Router = express.Router();