mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-12 20:29:01 +08:00
Update search.ts
This commit is contained in:
parent
e37ab8431a
commit
cbe0716439
@ -18,6 +18,7 @@ import { getScrapeQueue } from "../../services/queue-service";
|
|||||||
import { search } from "../../search";
|
import { search } from "../../search";
|
||||||
import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist";
|
import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist";
|
||||||
import * as Sentry from "@sentry/node";
|
import * as Sentry from "@sentry/node";
|
||||||
|
import { BLOCKLISTED_URL_MESSAGE } from "../../lib/strings";
|
||||||
|
|
||||||
async function scrapeSearchResult(
|
async function scrapeSearchResult(
|
||||||
searchResult: { url: string; title: string; description: string },
|
searchResult: { url: string; title: string; description: string },
|
||||||
@ -37,6 +38,9 @@ async function scrapeSearchResult(
|
|||||||
});
|
});
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
if (isUrlBlocked(searchResult.url)) {
|
||||||
|
throw new Error("Could not scrape url: " + BLOCKLISTED_URL_MESSAGE);
|
||||||
|
}
|
||||||
await addScrapeJob(
|
await addScrapeJob(
|
||||||
{
|
{
|
||||||
url: searchResult.url,
|
url: searchResult.url,
|
||||||
@ -75,9 +79,6 @@ async function scrapeSearchResult(
|
|||||||
description: searchResult.description,
|
description: searchResult.description,
|
||||||
url: searchResult.url,
|
url: searchResult.url,
|
||||||
metadata: {
|
metadata: {
|
||||||
title: searchResult.title,
|
|
||||||
description: searchResult.description,
|
|
||||||
sourceURL: searchResult.url,
|
|
||||||
statusCode: 0,
|
statusCode: 0,
|
||||||
error: error.message,
|
error: error.message,
|
||||||
},
|
},
|
||||||
@ -145,9 +146,6 @@ export async function searchController(
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// Filter out blocked URLs before scraping
|
|
||||||
searchResults = searchResults.filter((r) => !isUrlBlocked(r.url));
|
|
||||||
|
|
||||||
// Scrape each non-blocked result, handling timeouts individually
|
// Scrape each non-blocked result, handling timeouts individually
|
||||||
const scrapePromises = searchResults.map((result) =>
|
const scrapePromises = searchResults.map((result) =>
|
||||||
scrapeSearchResult(result, {
|
scrapeSearchResult(result, {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user