diff --git a/apps/api/src/__tests__/e2e_full_withAuth/index.test.ts b/apps/api/src/__tests__/e2e_full_withAuth/index.test.ts index 45b3c31e..40686c45 100644 --- a/apps/api/src/__tests__/e2e_full_withAuth/index.test.ts +++ b/apps/api/src/__tests__/e2e_full_withAuth/index.test.ts @@ -1,6 +1,7 @@ import request from "supertest"; import dotenv from "dotenv"; import { v4 as uuidv4 } from "uuid"; +import { BLOCKLISTED_URL_MESSAGE } from "../../lib/strings"; dotenv.config(); @@ -58,9 +59,7 @@ describe("E2E Tests for API Routes", () => { .set("Content-Type", "application/json") .send({ url: blocklistedUrl }); expect(response.statusCode).toBe(403); - expect(response.body.error).toContain( - "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.", - ); + expect(response.body.error).toContain(BLOCKLISTED_URL_MESSAGE); }); // tested on rate limit test @@ -480,9 +479,7 @@ describe("E2E Tests for API Routes", () => { .set("Content-Type", "application/json") .send({ url: blocklistedUrl }); expect(response.statusCode).toBe(403); - expect(response.body.error).toContain( - "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.", - ); + expect(response.body.error).toContain(BLOCKLISTED_URL_MESSAGE); }); it.concurrent( diff --git a/apps/api/src/__tests__/e2e_noAuth/index.test.ts b/apps/api/src/__tests__/e2e_noAuth/index.test.ts index e30352a5..9d5dc554 100644 --- a/apps/api/src/__tests__/e2e_noAuth/index.test.ts +++ b/apps/api/src/__tests__/e2e_noAuth/index.test.ts @@ -1,5 +1,6 @@ import request from "supertest"; import dotenv from "dotenv"; +import { BLOCKLISTED_URL_MESSAGE } from "../../lib/strings"; const fs = require("fs"); const path = require("path"); @@ -61,9 +62,7 @@ describe("E2E Tests for API Routes with No Authentication", () => { .set("Content-Type", "application/json") .send({ url: blocklistedUrl }); expect(response.statusCode).toBe(403); - expect(response.body.error).toContain( - "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.", - ); + expect(response.body.error).toContain(BLOCKLISTED_URL_MESSAGE); }); it("should return a successful response", async () => { @@ -88,9 +87,7 @@ describe("E2E Tests for API Routes with No Authentication", () => { .set("Content-Type", "application/json") .send({ url: blocklistedUrl }); expect(response.statusCode).toBe(403); - expect(response.body.error).toContain( - "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.", - ); + expect(response.body.error).toContain(BLOCKLISTED_URL_MESSAGE); }); it("should return a successful response", async () => { @@ -119,9 +116,7 @@ describe("E2E Tests for API Routes with No Authentication", () => { .set("Content-Type", "application/json") .send({ url: blocklistedUrl }); expect(response.statusCode).toBe(403); - expect(response.body.error).toContain( - "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.", - ); + expect(response.body.error).toContain(BLOCKLISTED_URL_MESSAGE); }); it("should return a successful response", async () => { diff --git a/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts b/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts index 35ee2d89..39e0aa85 100644 --- a/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts +++ b/apps/api/src/__tests__/e2e_v1_withAuth/index.test.ts @@ -4,6 +4,7 @@ import { ScrapeRequestInput, ScrapeResponseRequestTest, } from "../../controllers/v1/types"; +import { BLOCKLISTED_URL_MESSAGE } from "../../lib/strings"; configDotenv(); const TEST_URL = "http://127.0.0.1:3002"; @@ -57,9 +58,7 @@ describe("E2E Tests for v1 API Routes", () => { .send(scrapeRequest); expect(response.statusCode).toBe(403); - expect(response.body.error).toBe( - "URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions.", - ); + expect(response.body.error).toBe(BLOCKLISTED_URL_MESSAGE); }); it.concurrent( @@ -756,9 +755,7 @@ describe("E2E Tests for v1 API Routes", () => { .send(scrapeRequest); expect(response.statusCode).toBe(403); - expect(response.body.error).toBe( - "URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions.", - ); + expect(response.body.error).toBe(BLOCKLISTED_URL_MESSAGE); }); it.concurrent( diff --git a/apps/api/src/controllers/v0/crawl.ts b/apps/api/src/controllers/v0/crawl.ts index 36b8309f..ceeaa436 100644 --- a/apps/api/src/controllers/v0/crawl.ts +++ b/apps/api/src/controllers/v0/crawl.ts @@ -29,6 +29,7 @@ import * as Sentry from "@sentry/node"; import { getJobPriority } from "../../lib/job-priority"; import { fromLegacyScrapeOptions, url as urlSchema } from "../v1/types"; import { ZodError } from "zod"; +import { BLOCKLISTED_URL_MESSAGE } from "../../lib/strings"; export async function crawlController(req: Request, res: Response) { try { @@ -112,8 +113,7 @@ export async function crawlController(req: Request, res: Response) { if (isUrlBlocked(url)) { return res.status(403).json({ - error: - "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.", + error: BLOCKLISTED_URL_MESSAGE, }); } diff --git a/apps/api/src/controllers/v0/crawlPreview.ts b/apps/api/src/controllers/v0/crawlPreview.ts index 405e49c2..f9462c3d 100644 --- a/apps/api/src/controllers/v0/crawlPreview.ts +++ b/apps/api/src/controllers/v0/crawlPreview.ts @@ -15,6 +15,7 @@ import { addScrapeJob } from "../../../src/services/queue-jobs"; import { checkAndUpdateURL } from "../../../src/lib/validateUrl"; import * as Sentry from "@sentry/node"; import { fromLegacyScrapeOptions } from "../v1/types"; +import { BLOCKLISTED_URL_MESSAGE } from "../../lib/strings"; export async function crawlPreviewController(req: Request, res: Response) { try { @@ -42,8 +43,7 @@ export async function crawlPreviewController(req: Request, res: Response) { if (isUrlBlocked(url)) { return res.status(403).json({ - error: - "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.", + error: BLOCKLISTED_URL_MESSAGE, }); } diff --git a/apps/api/src/controllers/v0/scrape.ts b/apps/api/src/controllers/v0/scrape.ts index 2d8acf5f..05bf364b 100644 --- a/apps/api/src/controllers/v0/scrape.ts +++ b/apps/api/src/controllers/v0/scrape.ts @@ -29,6 +29,7 @@ import { getJobPriority } from "../../lib/job-priority"; import { fromLegacyScrapeOptions } from "../v1/types"; import { ZodError } from "zod"; import { Document as V0Document } from "./../../lib/entities"; +import { BLOCKLISTED_URL_MESSAGE } from "../../lib/strings"; export async function scrapeHelper( jobId: string, @@ -53,8 +54,7 @@ export async function scrapeHelper( if (isUrlBlocked(url)) { return { success: false, - error: - "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.", + error: BLOCKLISTED_URL_MESSAGE, returnCode: 403, }; } @@ -265,13 +265,15 @@ export async function scrapeController(req: Request, res: Response) { } if (creditsToBeBilled > 0) { // billing for doc done on queue end, bill only for llm extraction - billTeam(team_id, chunk?.sub_id, creditsToBeBilled, logger).catch((error) => { - logger.error( - `Failed to bill team ${team_id} for ${creditsToBeBilled} credits`, - { error } - ); - // Optionally, you could notify an admin or add to a retry queue here - }); + billTeam(team_id, chunk?.sub_id, creditsToBeBilled, logger).catch( + (error) => { + logger.error( + `Failed to bill team ${team_id} for ${creditsToBeBilled} credits`, + { error }, + ); + // Optionally, you could notify an admin or add to a retry queue here + }, + ); } } diff --git a/apps/api/src/controllers/v1/__tests__/urlValidation.test.ts b/apps/api/src/controllers/v1/__tests__/urlValidation.test.ts index b455e5ab..afa44e58 100644 --- a/apps/api/src/controllers/v1/__tests__/urlValidation.test.ts +++ b/apps/api/src/controllers/v1/__tests__/urlValidation.test.ts @@ -1,4 +1,5 @@ import { url } from "../types"; +import { BLOCKLISTED_URL_MESSAGE } from "../../../lib/strings"; describe("URL Schema Validation", () => { beforeEach(() => { @@ -31,7 +32,7 @@ describe("URL Schema Validation", () => { it("should reject blocked URLs", () => { expect(() => url.parse("https://facebook.com")).toThrow( - "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.", + BLOCKLISTED_URL_MESSAGE, ); }); @@ -47,16 +48,16 @@ describe("URL Schema Validation", () => { it("should handle URLs with subdomains that are blocked", () => { expect(() => url.parse("https://sub.facebook.com")).toThrow( - "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.", + BLOCKLISTED_URL_MESSAGE, ); }); it("should handle URLs with paths that are blocked", () => { expect(() => url.parse("http://facebook.com/path")).toThrow( - "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.", + BLOCKLISTED_URL_MESSAGE, ); expect(() => url.parse("https://facebook.com/another/path")).toThrow( - "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.", + BLOCKLISTED_URL_MESSAGE, ); }); diff --git a/apps/api/src/controllers/v1/extract.ts b/apps/api/src/controllers/v1/extract.ts index 1188bafb..58e75751 100644 --- a/apps/api/src/controllers/v1/extract.ts +++ b/apps/api/src/controllers/v1/extract.ts @@ -92,19 +92,18 @@ export async function extractController( let mappedLinks = mapResults.mapResults as MapDocument[]; // Remove duplicates between mapResults.links and mappedLinks - const allUrls = [...mappedLinks.map(m => m.url), ...mapResults.links]; + const allUrls = [...mappedLinks.map((m) => m.url), ...mapResults.links]; const uniqueUrls = removeDuplicateUrls(allUrls); - + // Only add URLs from mapResults.links that aren't already in mappedLinks - const existingUrls = new Set(mappedLinks.map(m => m.url)); - const newUrls = uniqueUrls.filter(url => !existingUrls.has(url)); - + const existingUrls = new Set(mappedLinks.map((m) => m.url)); + const newUrls = uniqueUrls.filter((url) => !existingUrls.has(url)); + mappedLinks = [ ...mappedLinks, - ...newUrls.map(url => ({ url, title: "", description: "" })) + ...newUrls.map((url) => ({ url, title: "", description: "" })), ]; - if (mappedLinks.length === 0) { mappedLinks = [{ url: baseUrl, title: "", description: "" }]; } @@ -117,7 +116,6 @@ export async function extractController( `url: ${x.url}, title: ${x.title}, description: ${x.description}`, ); - if (req.body.prompt) { let searchQuery = req.body.prompt && allowExternalLinks diff --git a/apps/api/src/controllers/v1/types.ts b/apps/api/src/controllers/v1/types.ts index 06605eb9..114c115e 100644 --- a/apps/api/src/controllers/v1/types.ts +++ b/apps/api/src/controllers/v1/types.ts @@ -11,6 +11,7 @@ import { Document as V0Document, } from "../../lib/entities"; import { InternalOptions } from "../../scraper/scrapeURL"; +import { BLOCKLISTED_URL_MESSAGE } from "../../lib/strings"; export type Format = | "markdown" @@ -44,10 +45,7 @@ export const url = z.preprocess( return false; } }, "Invalid URL") - .refine( - (x) => !isUrlBlocked(x as string), - "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.", - ), + .refine((x) => !isUrlBlocked(x as string), BLOCKLISTED_URL_MESSAGE), ); const strictMessage = diff --git a/apps/api/src/lib/strings.ts b/apps/api/src/lib/strings.ts new file mode 100644 index 00000000..4e278d2b --- /dev/null +++ b/apps/api/src/lib/strings.ts @@ -0,0 +1,2 @@ +export const BLOCKLISTED_URL_MESSAGE = + "This website is no longer supported, please reach out to help@firecrawl.com for more info on how to activate it on your account."; diff --git a/apps/api/src/main/runWebScraper.ts b/apps/api/src/main/runWebScraper.ts index c50ab9c9..0f3b8524 100644 --- a/apps/api/src/main/runWebScraper.ts +++ b/apps/api/src/main/runWebScraper.ts @@ -71,7 +71,7 @@ export async function runWebScraper({ module: "runWebscraper", scrapeId: bull_job_id, jobId: bull_job_id, - }) + }); const tries = is_crawl ? 3 : 1; let response: ScrapeUrlResponse | undefined = undefined; @@ -176,7 +176,7 @@ export async function runWebScraper({ billTeam(team_id, undefined, creditsToBeBilled, logger).catch((error) => { logger.error( `Failed to bill team ${team_id} for ${creditsToBeBilled} credits`, - { error } + { error }, ); // Optionally, you could notify an admin or add to a retry queue here }); diff --git a/apps/api/src/routes/v1.ts b/apps/api/src/routes/v1.ts index 76427114..1ee191ef 100644 --- a/apps/api/src/routes/v1.ts +++ b/apps/api/src/routes/v1.ts @@ -32,6 +32,7 @@ import { extractController } from "../controllers/v1/extract"; // import { livenessController } from "../controllers/v1/liveness"; // import { readinessController } from "../controllers/v1/readiness"; import { creditUsageController } from "../controllers/v1/credit-usage"; +import { BLOCKLISTED_URL_MESSAGE } from "../lib/strings"; function checkCreditsMiddleware( minimum?: number, @@ -123,8 +124,7 @@ function blocklistMiddleware(req: Request, res: Response, next: NextFunction) { if (!res.headersSent) { return res.status(403).json({ success: false, - error: - "URL is blocked intentionally. Firecrawl currently does not support scraping this site due to policy restrictions.", + error: BLOCKLISTED_URL_MESSAGE, }); } } @@ -231,4 +231,3 @@ v1Router.get( authMiddleware(RateLimiterMode.CrawlStatus), wrap(creditUsageController), ); - diff --git a/apps/api/src/services/billing/credit_billing.ts b/apps/api/src/services/billing/credit_billing.ts index c2671034..5eb541fd 100644 --- a/apps/api/src/services/billing/credit_billing.ts +++ b/apps/api/src/services/billing/credit_billing.ts @@ -44,7 +44,10 @@ export async function supaBillTeam( if (team_id === "preview") { return { success: true, message: "Preview team, no credits used" }; } - _logger.info(`Billing team ${team_id} for ${credits} credits`, { team_id, credits }); + _logger.info(`Billing team ${team_id} for ${credits} credits`, { + team_id, + credits, + }); const { data, error } = await supabase_service.rpc("bill_team", { _team_id: team_id, diff --git a/apps/js-sdk/firecrawl/src/__tests__/v1/e2e_withAuth/index.test.ts b/apps/js-sdk/firecrawl/src/__tests__/v1/e2e_withAuth/index.test.ts index dea55846..81b0a523 100644 --- a/apps/js-sdk/firecrawl/src/__tests__/v1/e2e_withAuth/index.test.ts +++ b/apps/js-sdk/firecrawl/src/__tests__/v1/e2e_withAuth/index.test.ts @@ -159,12 +159,6 @@ describe('FirecrawlApp E2E Tests', () => { await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401"); }); - test.concurrent('should throw error for blocklisted URL on crawl', async () => { - const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); - const blocklistedUrl = "https://twitter.com/fake-test"; - await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow("URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions."); - }); - test.concurrent('should return successful response for crawl and wait for completion', async () => { const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const response = await app.crawlUrl('https://roastmywebsite.ai', {}, 30) as CrawlStatusResponse; diff --git a/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py b/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py index 8945d74d..50d5306f 100644 --- a/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py +++ b/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py @@ -29,12 +29,12 @@ def test_scrape_url_invalid_api_key(): invalid_app.scrape_url('https://firecrawl.dev') assert "Unexpected error during scrape URL: Status code 401. Unauthorized: Invalid token" in str(excinfo.value) -def test_blocklisted_url(): - blocklisted_url = "https://facebook.com/fake-test" - app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0') - with pytest.raises(Exception) as excinfo: - app.scrape_url(blocklisted_url) - assert "Unexpected error during scrape URL: Status code 403. Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it." in str(excinfo.value) +# def test_blocklisted_url(): +# blocklisted_url = "https://facebook.com/fake-test" +# app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0') +# with pytest.raises(Exception) as excinfo: +# app.scrape_url(blocklisted_url) +# assert "Unexpected error during scrape URL: Status code 403. Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it." in str(excinfo.value) def test_successful_response_with_valid_preview_token(): app = FirecrawlApp(api_url=API_URL, api_key="this_is_just_a_preview_token", version='v0') @@ -90,12 +90,12 @@ def test_crawl_url_invalid_api_key(): invalid_app.crawl_url('https://firecrawl.dev') assert "Unexpected error during start crawl job: Status code 401. Unauthorized: Invalid token" in str(excinfo.value) -def test_should_return_error_for_blocklisted_url(): - app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0') - blocklisted_url = "https://twitter.com/fake-test" - with pytest.raises(Exception) as excinfo: - app.crawl_url(blocklisted_url) - assert "Unexpected error during start crawl job: Status code 403. Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it." in str(excinfo.value) +# def test_should_return_error_for_blocklisted_url(): +# app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0') +# blocklisted_url = "https://twitter.com/fake-test" +# with pytest.raises(Exception) as excinfo: +# app.crawl_url(blocklisted_url) +# assert "Unexpected error during start crawl job: Status code 403. Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it." in str(excinfo.value) def test_crawl_url_wait_for_completion_e2e(): app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY, version='v0') diff --git a/apps/python-sdk/firecrawl/__tests__/v1/e2e_withAuth/test.py b/apps/python-sdk/firecrawl/__tests__/v1/e2e_withAuth/test.py index 12fa10ce..0ada6c1d 100644 --- a/apps/python-sdk/firecrawl/__tests__/v1/e2e_withAuth/test.py +++ b/apps/python-sdk/firecrawl/__tests__/v1/e2e_withAuth/test.py @@ -30,12 +30,12 @@ def test_scrape_url_invalid_api_key(): invalid_app.scrape_url('https://firecrawl.dev') assert "Unauthorized: Invalid token" in str(excinfo.value) -def test_blocklisted_url(): - blocklisted_url = "https://facebook.com/fake-test" - app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) - with pytest.raises(Exception) as excinfo: - app.scrape_url(blocklisted_url) - assert "URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions." in str(excinfo.value) +# def test_blocklisted_url(): +# blocklisted_url = "https://facebook.com/fake-test" +# app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) +# with pytest.raises(Exception) as excinfo: +# app.scrape_url(blocklisted_url) +# assert "URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions." in str(excinfo.value) def test_successful_response_with_valid_preview_token(): app = FirecrawlApp(api_url=API_URL, api_key="this_is_just_a_preview_token") @@ -136,12 +136,12 @@ def test_crawl_url_invalid_api_key(): invalid_app.crawl_url('https://firecrawl.dev') assert "Unauthorized: Invalid token" in str(excinfo.value) -def test_should_return_error_for_blocklisted_url(): - app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) - blocklisted_url = "https://twitter.com/fake-test" - with pytest.raises(Exception) as excinfo: - app.crawl_url(blocklisted_url) - assert "URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions." in str(excinfo.value) +# def test_should_return_error_for_blocklisted_url(): +# app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) +# blocklisted_url = "https://twitter.com/fake-test" +# with pytest.raises(Exception) as excinfo: +# app.crawl_url(blocklisted_url) +# assert "URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions." in str(excinfo.value) def test_crawl_url_wait_for_completion_e2e(): app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) @@ -296,12 +296,12 @@ def test_invalid_api_key_on_map(): invalid_app.map_url('https://roastmywebsite.ai') assert "Unauthorized: Invalid token" in str(excinfo.value) -def test_blocklisted_url_on_map(): - app = FirecrawlApp(api_key=TEST_API_KEY, api_url=API_URL) - blocklisted_url = "https://facebook.com/fake-test" - with pytest.raises(Exception) as excinfo: - app.map_url(blocklisted_url) - assert "URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions." in str(excinfo.value) +# def test_blocklisted_url_on_map(): +# app = FirecrawlApp(api_key=TEST_API_KEY, api_url=API_URL) +# blocklisted_url = "https://facebook.com/fake-test" +# with pytest.raises(Exception) as excinfo: +# app.map_url(blocklisted_url) +# assert "URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions." in str(excinfo.value) def test_successful_response_with_valid_preview_token_on_map(): app = FirecrawlApp(api_key="this_is_just_a_preview_token", api_url=API_URL) diff --git a/apps/rust-sdk/tests/e2e_with_auth.rs b/apps/rust-sdk/tests/e2e_with_auth.rs index 75568f92..92b202cb 100644 --- a/apps/rust-sdk/tests/e2e_with_auth.rs +++ b/apps/rust-sdk/tests/e2e_with_auth.rs @@ -5,20 +5,20 @@ use firecrawl::FirecrawlApp; use serde_json::json; use std::env; -#[tokio::test] -async fn test_blocklisted_url() { - dotenv().ok(); - let api_url = env::var("API_URL").unwrap(); - let api_key = env::var("TEST_API_KEY").ok(); - let app = FirecrawlApp::new_selfhosted(api_url, api_key).unwrap(); - let blocklisted_url = "https://facebook.com/fake-test"; - let result = app.scrape_url(blocklisted_url, None).await; +// #[tokio::test] +// async fn test_blocklisted_url() { +// dotenv().ok(); +// let api_url = env::var("API_URL").unwrap(); +// let api_key = env::var("TEST_API_KEY").ok(); +// let app = FirecrawlApp::new_selfhosted(api_url, api_key).unwrap(); +// let blocklisted_url = "https://facebook.com/fake-test"; +// let result = app.scrape_url(blocklisted_url, None).await; - assert_matches!( - result, - Err(e) if e.to_string().contains("Firecrawl currently does not support social media scraping due to policy restrictions") - ); -} +// assert_matches!( +// result, +// Err(e) if e.to_string().contains("Firecrawl currently does not support social media scraping due to policy restrictions") +// ); +// } #[tokio::test] async fn test_successful_response_with_valid_preview_token() { @@ -103,20 +103,21 @@ async fn test_successful_response_for_valid_scrape_with_pdf_file_without_explici .contains("We present spectrophotometric observations of the Broad Line Radio Galaxy")); } -#[tokio::test] -async fn test_should_return_error_for_blocklisted_url() { - dotenv().ok(); - let api_url = env::var("API_URL").unwrap(); - let api_key = env::var("TEST_API_KEY").ok(); - let app = FirecrawlApp::new_selfhosted(api_url, api_key).unwrap(); - let blocklisted_url = "https://twitter.com/fake-test"; - let result = app.crawl_url(blocklisted_url, None).await; - assert_matches!( - result, - Err(e) if e.to_string().contains("Firecrawl currently does not support social media scraping due to policy restrictions.") - ); -} +// #[tokio::test] +// async fn test_should_return_error_for_blocklisted_url() { +// dotenv().ok(); +// let api_url = env::var("API_URL").unwrap(); +// let api_key = env::var("TEST_API_KEY").ok(); +// let app = FirecrawlApp::new_selfhosted(api_url, api_key).unwrap(); +// let blocklisted_url = "https://twitter.com/fake-test"; +// let result = app.crawl_url(blocklisted_url, None).await; + +// assert_matches!( +// result, +// Err(e) if e.to_string().contains("Firecrawl currently does not support social media scraping due to policy restrictions.") +// ); +// } #[tokio::test] async fn test_llm_extraction() {