diff --git a/.github/workflows/test-server-self-host.yml b/.github/workflows/test-server-self-host.yml
index 301353df..26d3aad3 100644
--- a/.github/workflows/test-server-self-host.yml
+++ b/.github/workflows/test-server-self-host.yml
@@ -15,6 +15,7 @@ env:
   ENV: ${{ secrets.ENV }}
   TEST_SUITE_SELF_HOSTED: true
   USE_GO_MARKDOWN_PARSER: true
+  FIRECRAWL_DEBUG_FILTER_LINKS: true
 
 jobs:
   test:
diff --git a/.github/workflows/test-server.yml b/.github/workflows/test-server.yml
index 188cbeb7..66156261 100644
--- a/.github/workflows/test-server.yml
+++ b/.github/workflows/test-server.yml
@@ -21,6 +21,7 @@ env:
   SUPABASE_ANON_TOKEN: ${{ secrets.SUPABASE_ANON_TOKEN }}
   SUPABASE_SERVICE_TOKEN: ${{ secrets.SUPABASE_SERVICE_TOKEN }}
   SUPABASE_URL: ${{ secrets.SUPABASE_URL }}
+  SUPABASE_REPLICA_URL: ${{ secrets.SUPABASE_REPLICA_URL }}
   TEST_API_KEY: ${{ secrets.TEST_API_KEY }}
   FIRE_ENGINE_BETA_URL: ${{ secrets.FIRE_ENGINE_BETA_URL }}
   USE_DB_AUTHENTICATION: true
@@ -72,18 +73,20 @@ jobs:
           chmod +x html-to-markdown.so
         working-directory: ./apps/api/sharedLibs/go-html-to-md
       - name: Start the application
-        run: npm start &
+        run: npm start > api.log 2>&1 &
         working-directory: ./apps/api
         id: start_app
       - name: Start worker
-        run: npm run workers &
+        run: npm run workers > worker.log 2>&1 &
         working-directory: ./apps/api
         id: start_workers
       - name: Start index worker
         run: npm run index-worker &
         working-directory: ./apps/api
         id: start_index_worker
+      - name: Wait for API
+        run: pnpx wait-on tcp:3002 -t 15s
       - name: Run snippet tests
         run: |
           npm run test:snips
-        working-directory: ./apps/api
+        working-directory: ./apps/api
\ No newline at end of file
diff --git a/apps/api/src/__tests__/snips/billing.test.ts b/apps/api/src/__tests__/snips/billing.test.ts
index 314c3f7a..b3639fcb 100644
--- a/apps/api/src/__tests__/snips/billing.test.ts
+++ b/apps/api/src/__tests__/snips/billing.test.ts
@@ -125,13 +125,18 @@ describe("Billing tests", () => {
                 })
             ]);
             
+            expect(crawl1.success).toBe(true);
+            expect(crawl2.success).toBe(true);
+            
             // sum: x+5y credits
 
             await sleepForBatchBilling();
 
             const rc2 = (await creditUsage()).remaining_credits;
 
-            expect(rc1 - rc2).toBe(crawl1.body.completed + crawl2.body.completed * 5);
+            if (crawl1.success && crawl2.success) {
+                expect(rc1 - rc2).toBe(crawl1.completed + crawl2.completed * 5);
+            }
         }, 300000);
 
         it("bills map correctly", async () => {
diff --git a/apps/api/src/__tests__/snips/crawl.test.ts b/apps/api/src/__tests__/snips/crawl.test.ts
index 67d5a181..f388243d 100644
--- a/apps/api/src/__tests__/snips/crawl.test.ts
+++ b/apps/api/src/__tests__/snips/crawl.test.ts
@@ -7,4 +7,50 @@ describe("Crawl tests", () => {
             limit: 10,
         });
     }, 120000);
+
+    it.concurrent("filters URLs properly", async () => {
+        const res = await crawl({
+            url: "https://firecrawl.dev/pricing",
+            includePaths: ["^/pricing$"],
+            limit: 10,
+        });
+
+        expect(res.success).toBe(true);
+        if (res.success) {
+            expect(res.completed).toBe(1);
+            expect(res.data[0].metadata.sourceURL).toBe("https://firecrawl.dev/pricing");
+        }
+    }, 120000);
+
+    it.concurrent("filters URLs properly when using regexOnFullURL", async () => {
+        const res = await crawl({
+            url: "https://firecrawl.dev/pricing",
+            includePaths: ["^https://(www\\.)?firecrawl\\.dev/pricing$"],
+            regexOnFullURL: true,
+            limit: 10,
+        });
+
+        expect(res.success).toBe(true);
+        if (res.success) {
+            expect(res.completed).toBe(1);
+            expect(res.data[0].metadata.sourceURL).toBe("https://firecrawl.dev/pricing");
+        }
+    }, 120000);
+
+    it.concurrent("discovers URLs properly when origin is not included", async () => {
+        const res = await crawl({
+            url: "https://firecrawl.dev",
+            includePaths: ["^/blog"],
+            ignoreSitemap: true,
+            limit: 10,
+        });
+
+        expect(res.success).toBe(true);
+        if (res.success) {
+            expect(res.data.length).toBeGreaterThan(1);
+            for (const page of res.data) {
+                expect(page.metadata.url ?? page.metadata.sourceURL).toMatch(/^https:\/\/(www\.)?firecrawl\.dev\/blog/);
+            }
+        }
+    }, 120000);
 });
diff --git a/apps/api/src/__tests__/snips/lib.ts b/apps/api/src/__tests__/snips/lib.ts
index c296f465..fb1f6cff 100644
--- a/apps/api/src/__tests__/snips/lib.ts
+++ b/apps/api/src/__tests__/snips/lib.ts
@@ -1,7 +1,7 @@
 import { configDotenv } from "dotenv";
 configDotenv();
 
-import { ScrapeRequestInput, Document, ExtractRequestInput, ExtractResponse, CrawlRequestInput, MapRequestInput, BatchScrapeRequestInput, SearchRequestInput } from "../../controllers/v1/types";
+import { ScrapeRequestInput, Document, ExtractRequestInput, ExtractResponse, CrawlRequestInput, MapRequestInput, BatchScrapeRequestInput, SearchRequestInput, CrawlStatusResponse } from "../../controllers/v1/types";
 import request from "supertest";
 
 // =========================================
@@ -69,7 +69,7 @@ function expectCrawlToSucceed(response: Awaited<ReturnType<typeof crawlStatus>>)
     expect(response.body.data.length).toBeGreaterThan(0);
 }
 
-export async function crawl(body: CrawlRequestInput): ReturnType<typeof crawlStatus> {
+export async function crawl(body: CrawlRequestInput): Promise<CrawlStatusResponse> {
     const cs = await crawlStart(body);
     expectCrawlStartToSucceed(cs);
 
@@ -82,7 +82,7 @@ export async function crawl(body: CrawlRequestInput): ReturnType<typeof crawlSta
     } while (x.body.status === "scraping");
 
     expectCrawlToSucceed(x);
-    return x;
+    return x.body;
 }
 
 // =========================================
diff --git a/apps/api/src/__tests__/snips/scrape.test.ts b/apps/api/src/__tests__/snips/scrape.test.ts
index ddff7ba8..fcba163f 100644
--- a/apps/api/src/__tests__/snips/scrape.test.ts
+++ b/apps/api/src/__tests__/snips/scrape.test.ts
@@ -81,7 +81,7 @@ describe("Scrape tests", () => {
           blockAds: false,
         });
 
-        expect(response.markdown).toContain(".g.doubleclick.net/");
+        expect(response.markdown).toMatch(/(\.g\.doubleclick\.net|amazon-adsystem\.com)\//);
       }, 30000);
     });
   
diff --git a/apps/api/src/controllers/v1/types.ts b/apps/api/src/controllers/v1/types.ts
index efb5e094..4f946f03 100644
--- a/apps/api/src/controllers/v1/types.ts
+++ b/apps/api/src/controllers/v1/types.ts
@@ -448,6 +448,7 @@ const crawlerOptions = z
     ignoreSitemap: z.boolean().default(false),
     deduplicateSimilarURLs: z.boolean().default(true),
     ignoreQueryParameters: z.boolean().default(false),
+    regexOnFullURL: z.boolean().default(false),
   })
   .strict(strictMessage);
 
@@ -791,6 +792,7 @@ export function toLegacyCrawlerOptions(x: CrawlerOptions) {
     ignoreSitemap: x.ignoreSitemap,
     deduplicateSimilarURLs: x.deduplicateSimilarURLs,
     ignoreQueryParameters: x.ignoreQueryParameters,
+    regexOnFullURL: x.regexOnFullURL,
   };
 }
 
@@ -811,6 +813,7 @@ export function fromLegacyCrawlerOptions(x: any): {
       ignoreSitemap: x.ignoreSitemap,
       deduplicateSimilarURLs: x.deduplicateSimilarURLs,
       ignoreQueryParameters: x.ignoreQueryParameters,
+      regexOnFullURL: x.regexOnFullURL,
     }),
     internalOptions: {
       v0CrawlOnlyUrls: x.returnOnlyUrls,
diff --git a/apps/api/src/lib/crawl-redis.ts b/apps/api/src/lib/crawl-redis.ts
index 526ba235..256d7435 100644
--- a/apps/api/src/lib/crawl-redis.ts
+++ b/apps/api/src/lib/crawl-redis.ts
@@ -398,6 +398,7 @@ export function crawlToCrawler(
       sc.crawlerOptions?.allowExternalContentLinks ?? false,
     allowSubdomains: sc.crawlerOptions?.allowSubdomains ?? false,
     ignoreRobotsTxt: sc.crawlerOptions?.ignoreRobotsTxt ?? false,
+    regexOnFullURL: sc.crawlerOptions?.regexOnFullURL ?? false,
   });
 
   if (sc.robots !== undefined) {
diff --git a/apps/api/src/scraper/WebScraper/crawler.ts b/apps/api/src/scraper/WebScraper/crawler.ts
index 126520e2..ea93110a 100644
--- a/apps/api/src/scraper/WebScraper/crawler.ts
+++ b/apps/api/src/scraper/WebScraper/crawler.ts
@@ -28,6 +28,7 @@ export class WebCrawler {
   private allowExternalContentLinks: boolean;
   private allowSubdomains: boolean;
   private ignoreRobotsTxt: boolean;
+  private regexOnFullURL: boolean;
   private logger: typeof _logger;
   private sitemapsHit: Set<string> = new Set();
 
@@ -45,6 +46,7 @@ export class WebCrawler {
     allowExternalContentLinks = false,
     allowSubdomains = false,
     ignoreRobotsTxt = false,
+    regexOnFullURL = false,
   }: {
     jobId: string;
     initialUrl: string;
@@ -59,6 +61,7 @@ export class WebCrawler {
     allowExternalContentLinks?: boolean;
     allowSubdomains?: boolean;
     ignoreRobotsTxt?: boolean;
+    regexOnFullURL?: boolean;
   }) {
     this.jobId = jobId;
     this.initialUrl = initialUrl;
@@ -76,6 +79,7 @@ export class WebCrawler {
     this.allowExternalContentLinks = allowExternalContentLinks ?? false;
     this.allowSubdomains = allowSubdomains ?? false;
     this.ignoreRobotsTxt = ignoreRobotsTxt ?? false;
+    this.regexOnFullURL = regexOnFullURL ?? false;
     this.logger = _logger.child({ crawlId: this.jobId, module: "WebCrawler" });
   }
 
@@ -115,11 +119,13 @@ export class WebCrawler {
           return false;
         }
 
+        const excincPath = this.regexOnFullURL ? link : path;
+
         // Check if the link should be excluded
         if (this.excludes.length > 0 && this.excludes[0] !== "") {
           if (
             this.excludes.some((excludePattern) =>
-              new RegExp(excludePattern).test(path),
+              new RegExp(excludePattern).test(excincPath),
             )
           ) {
             if (process.env.FIRECRAWL_DEBUG_FILTER_LINKS) {
@@ -133,7 +139,7 @@ export class WebCrawler {
         if (this.includes.length > 0 && this.includes[0] !== "") {
           if (
             !this.includes.some((includePattern) =>
-              new RegExp(includePattern).test(path),
+              new RegExp(includePattern).test(excincPath),
             )
           ) {
             if (process.env.FIRECRAWL_DEBUG_FILTER_LINKS) {
diff --git a/apps/api/src/services/queue-worker.ts b/apps/api/src/services/queue-worker.ts
index 86375ec2..6287cf95 100644
--- a/apps/api/src/services/queue-worker.ts
+++ b/apps/api/src/services/queue-worker.ts
@@ -1112,6 +1112,11 @@ async function processJob(job: Job & { id: string }, token: string) {
               // });
             }
           }
+
+          // Only run check after adding new jobs for discovery - mogery
+          if (job.data.isCrawlSourceScrape && crawler.filterLinks([doc.metadata.url ?? doc.metadata.sourceURL!], 1, sc.crawlerOptions?.maxDepth ?? 10).length === 0) {
+            throw new Error("Source URL is not allowed by includePaths/excludePaths rules")
+          }
         }
       }
 
diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts
index 6d3093b8..006e3737 100644
--- a/apps/js-sdk/firecrawl/src/index.ts
+++ b/apps/js-sdk/firecrawl/src/index.ts
@@ -173,6 +173,7 @@ export interface CrawlParams {
   };
   deduplicateSimilarURLs?: boolean;
   ignoreQueryParameters?: boolean;
+  regexOnFullURL?: boolean;
 }
 
 /**