From 10d9b65f96e4b44c209dcaef159601d2370d2059 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= <mo.geryy@gmail.com>
Date: Thu, 20 Feb 2025 07:57:06 +0100
Subject: [PATCH] fix(self-host): update docs and dockerignore

---
 SELF_HOST.md                             | 53 +++++++++++++++---------
 apps/playwright-service-ts/.dockerignore |  3 ++
 2 files changed, 36 insertions(+), 20 deletions(-)
 create mode 100644 apps/playwright-service-ts/.dockerignore

diff --git a/SELF_HOST.md b/SELF_HOST.md
index f572d1ea..cff3e82f 100644
--- a/SELF_HOST.md
+++ b/SELF_HOST.md
@@ -41,32 +41,45 @@ To start, we won't set up authentication or any optional subservices (pdf parsin
 `.env:`
 ```
 # ===== Required ENVS ======
-NUM_WORKERS_PER_QUEUE=8
 PORT=3002
 HOST=0.0.0.0
-REDIS_URL=redis://redis:6379
-REDIS_RATE_LIMIT_URL=redis://redis:6379
 
-## To turn on DB authentication, you need to set up Supabase.
+# To turn on DB authentication, you need to set up Supabase.
 USE_DB_AUTHENTICATION=false
 
 # ===== Optional ENVS ======
 
 # Supabase Setup (used to support DB authentication, advanced logging, etc.)
-SUPABASE_ANON_TOKEN=
-SUPABASE_URL=
-SUPABASE_SERVICE_TOKEN=
+# SUPABASE_ANON_TOKEN=
+# SUPABASE_URL=
+# SUPABASE_SERVICE_TOKEN=
 
-# Other Optionals
-TEST_API_KEY= # use if you've set up authentication and want to test with a real API key
-SCRAPING_BEE_API_KEY= # use if you'd like to use as a fallback scraper
-OPENAI_API_KEY= # add for LLM-dependent features (e.g., image alt generation)
-BULL_AUTH_KEY= @
-PLAYWRIGHT_MICROSERVICE_URL=  # set if you'd like to run a playwright fallback
-LLAMAPARSE_API_KEY= #Set if you have a llamaparse key you'd like to use to parse pdfs
-SLACK_WEBHOOK_URL= # set if you'd like to send slack server health status messages
-POSTHOG_API_KEY= # set if you'd like to send posthog events like job logs
-POSTHOG_HOST= # set if you'd like to send posthog events like job logs
+# Use if you've set up authentication and want to test with a real API key
+# TEST_API_KEY=
+
+# You can add this to enable ScrapingBee as a fallback scraping engine.
+# SCRAPING_BEE_API_KEY=
+
+# Needed for JSON format on scrape and /extract endpoint
+# OPENAI_API_KEY=
+
+# This key lets you access the queue admin panel. Change this if your deployment is publicly accessible.
+BULL_AUTH_KEY=CHANGEME
+
+# This is now autoconfigured by the docker-compose.yaml. You shouldn't need to set it.
+# PLAYWRIGHT_MICROSERVICE_URL=http://playwright-service:3000/scrape
+# REDIS_URL=redis://redis:6379
+# REDIS_RATE_LIMIT_URL=redis://redis:6379
+
+# Set if you have a llamaparse key you'd like to use to parse pdfs
+# LLAMAPARSE_API_KEY=
+
+# Set if you'd like to send server health status messages to Slack
+# SLACK_WEBHOOK_URL=
+
+# Set if you'd like to send posthog events like job logs
+# POSTHOG_API_KEY=
+# POSTHOG_HOST=
 ```
 
 3.  Build and run the Docker containers:
@@ -78,9 +91,9 @@ POSTHOG_HOST= # set if you'd like to send posthog events like job logs
 
 This will run a local instance of Firecrawl which can be accessed at `http://localhost:3002`.
 
-You should be able to see the Bull Queue Manager UI on `http://localhost:3002/admin/@/queues`.
+You should be able to see the Bull Queue Manager UI on `http://localhost:3002/admin/CHANGEME/queues`.
 
-5. *(Optional)* Test the API
+4. *(Optional)* Test the API
 
 If you’d like to test the crawl endpoint, you can run this:
 
@@ -88,7 +101,7 @@ If you’d like to test the crawl endpoint, you can run this:
   curl -X POST http://localhost:3002/v1/crawl \
       -H 'Content-Type: application/json' \
       -d '{
-        "url": "https://mendable.ai"
+        "url": "https://firecrawl.dev"
       }'
   ```   
 
diff --git a/apps/playwright-service-ts/.dockerignore b/apps/playwright-service-ts/.dockerignore
new file mode 100644
index 00000000..6c6badcd
--- /dev/null
+++ b/apps/playwright-service-ts/.dockerignore
@@ -0,0 +1,3 @@
+/node_modules/
+/dist/
+.env