From 10d9b65f96e4b44c209dcaef159601d2370d2059 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Thu, 20 Feb 2025 07:57:06 +0100 Subject: [PATCH] fix(self-host): update docs and dockerignore --- SELF_HOST.md | 53 +++++++++++++++--------- apps/playwright-service-ts/.dockerignore | 3 ++ 2 files changed, 36 insertions(+), 20 deletions(-) create mode 100644 apps/playwright-service-ts/.dockerignore diff --git a/SELF_HOST.md b/SELF_HOST.md index f572d1ea..cff3e82f 100644 --- a/SELF_HOST.md +++ b/SELF_HOST.md @@ -41,32 +41,45 @@ To start, we won't set up authentication or any optional subservices (pdf parsin `.env:` ``` # ===== Required ENVS ====== -NUM_WORKERS_PER_QUEUE=8 PORT=3002 HOST=0.0.0.0 -REDIS_URL=redis://redis:6379 -REDIS_RATE_LIMIT_URL=redis://redis:6379 -## To turn on DB authentication, you need to set up Supabase. +# To turn on DB authentication, you need to set up Supabase. USE_DB_AUTHENTICATION=false # ===== Optional ENVS ====== # Supabase Setup (used to support DB authentication, advanced logging, etc.) -SUPABASE_ANON_TOKEN= -SUPABASE_URL= -SUPABASE_SERVICE_TOKEN= +# SUPABASE_ANON_TOKEN= +# SUPABASE_URL= +# SUPABASE_SERVICE_TOKEN= -# Other Optionals -TEST_API_KEY= # use if you've set up authentication and want to test with a real API key -SCRAPING_BEE_API_KEY= # use if you'd like to use as a fallback scraper -OPENAI_API_KEY= # add for LLM-dependent features (e.g., image alt generation) -BULL_AUTH_KEY= @ -PLAYWRIGHT_MICROSERVICE_URL= # set if you'd like to run a playwright fallback -LLAMAPARSE_API_KEY= #Set if you have a llamaparse key you'd like to use to parse pdfs -SLACK_WEBHOOK_URL= # set if you'd like to send slack server health status messages -POSTHOG_API_KEY= # set if you'd like to send posthog events like job logs -POSTHOG_HOST= # set if you'd like to send posthog events like job logs +# Use if you've set up authentication and want to test with a real API key +# TEST_API_KEY= + +# You can add this to enable ScrapingBee as a fallback scraping engine. +# SCRAPING_BEE_API_KEY= + +# Needed for JSON format on scrape and /extract endpoint +# OPENAI_API_KEY= + +# This key lets you access the queue admin panel. Change this if your deployment is publicly accessible. +BULL_AUTH_KEY=CHANGEME + +# This is now autoconfigured by the docker-compose.yaml. You shouldn't need to set it. +# PLAYWRIGHT_MICROSERVICE_URL=http://playwright-service:3000/scrape +# REDIS_URL=redis://redis:6379 +# REDIS_RATE_LIMIT_URL=redis://redis:6379 + +# Set if you have a llamaparse key you'd like to use to parse pdfs +# LLAMAPARSE_API_KEY= + +# Set if you'd like to send server health status messages to Slack +# SLACK_WEBHOOK_URL= + +# Set if you'd like to send posthog events like job logs +# POSTHOG_API_KEY= +# POSTHOG_HOST= ``` 3. Build and run the Docker containers: @@ -78,9 +91,9 @@ POSTHOG_HOST= # set if you'd like to send posthog events like job logs This will run a local instance of Firecrawl which can be accessed at `http://localhost:3002`. -You should be able to see the Bull Queue Manager UI on `http://localhost:3002/admin/@/queues`. +You should be able to see the Bull Queue Manager UI on `http://localhost:3002/admin/CHANGEME/queues`. -5. *(Optional)* Test the API +4. *(Optional)* Test the API If you’d like to test the crawl endpoint, you can run this: @@ -88,7 +101,7 @@ If you’d like to test the crawl endpoint, you can run this: curl -X POST http://localhost:3002/v1/crawl \ -H 'Content-Type: application/json' \ -d '{ - "url": "https://mendable.ai" + "url": "https://firecrawl.dev" }' ``` diff --git a/apps/playwright-service-ts/.dockerignore b/apps/playwright-service-ts/.dockerignore new file mode 100644 index 00000000..6c6badcd --- /dev/null +++ b/apps/playwright-service-ts/.dockerignore @@ -0,0 +1,3 @@ +/node_modules/ +/dist/ +.env