mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-13 08:49:01 +08:00
Merge branch 'main' into v1-webscraper
This commit is contained in:
commit
b0d211ecc1
@ -336,6 +336,8 @@ function getPlanByPriceId(price_id: string) {
|
||||
case process.env.STRIPE_PRICE_ID_GROWTH:
|
||||
case process.env.STRIPE_PRICE_ID_GROWTH_YEARLY:
|
||||
return "growth";
|
||||
case process.env.STRIPE_PRICE_ID_GROWTH_DOUBLE_MONTHLY:
|
||||
return "growthdouble";
|
||||
default:
|
||||
return "free";
|
||||
}
|
||||
|
@ -45,6 +45,16 @@ export async function isCrawlFinished(id: string) {
|
||||
return (await redisConnection.scard("crawl:" + id + ":jobs_done")) === (await redisConnection.scard("crawl:" + id + ":jobs"));
|
||||
}
|
||||
|
||||
export async function finishCrawl(id: string) {
|
||||
if (await isCrawlFinished(id)) {
|
||||
const set = await redisConnection.setnx("crawl:" + id + ":finish", "yes");
|
||||
if (set === 1) {
|
||||
await redisConnection.expire("crawl:" + id + ":finish", 24 * 60 * 60);
|
||||
}
|
||||
return set === 1
|
||||
}
|
||||
}
|
||||
|
||||
export async function getCrawlJobs(id: string): Promise<string[]> {
|
||||
return await redisConnection.smembers("crawl:" + id + ":jobs");
|
||||
}
|
||||
|
@ -15,7 +15,7 @@ import { Logger } from "../lib/logger";
|
||||
import { Worker } from "bullmq";
|
||||
import systemMonitor from "./system-monitor";
|
||||
import { v4 as uuidv4 } from "uuid";
|
||||
import { addCrawlJob, addCrawlJobDone, crawlToCrawler, getCrawl, getCrawlJobs, isCrawlFinished, lockURL } from "../lib/crawl-redis";
|
||||
import { addCrawlJob, addCrawlJobDone, crawlToCrawler, finishCrawl, getCrawl, getCrawlJobs, lockURL } from "../lib/crawl-redis";
|
||||
import { StoredCrawl } from "../lib/crawl-redis";
|
||||
import { addScrapeJob } from "./queue-jobs";
|
||||
import { supabaseGetJobById } from "../../src/lib/supabase-jobs";
|
||||
@ -174,7 +174,7 @@ async function processJob(job: Job, token: string) {
|
||||
if (!sc.cancelled) {
|
||||
const crawler = crawlToCrawler(job.data.crawl_id, sc);
|
||||
|
||||
const links = crawler.filterLinks((data.docs[0].linksOnPage as string[])
|
||||
const links = crawler.filterLinks((data.docs[0].linksOnPage ?? [])
|
||||
.map(href => crawler.filterURL(href.trim(), sc.originUrl))
|
||||
.filter(x => x !== null),
|
||||
Infinity,
|
||||
@ -199,7 +199,7 @@ async function processJob(job: Job, token: string) {
|
||||
}
|
||||
}
|
||||
|
||||
if (await isCrawlFinished(job.data.crawl_id)) {
|
||||
if (await finishCrawl(job.data.crawl_id)) {
|
||||
const jobIDs = await getCrawlJobs(job.data.crawl_id);
|
||||
|
||||
const jobs = (await Promise.all(jobIDs.map(async x => {
|
||||
@ -226,14 +226,14 @@ async function processJob(job: Job, token: string) {
|
||||
return j;
|
||||
}))).sort((a, b) => a.timestamp - b.timestamp);
|
||||
const jobStatuses = await Promise.all(jobs.map(x => x.getState()));
|
||||
const jobStatus = sc.cancelled ? "failed" : jobStatuses.every(x => x === "completed") ? "completed" : jobStatuses.some(x => x === "failed") ? "failed" : "active";
|
||||
const jobStatus = sc.cancelled || jobStatuses.some(x => x === "failed") ? "failed" : "completed";
|
||||
|
||||
const fullDocs = jobs.map(x => Array.isArray(x.returnvalue) ? x.returnvalue[0] : x.returnvalue);
|
||||
|
||||
await logJob({
|
||||
job_id: job.data.crawl_id,
|
||||
success: jobStatus === "completed",
|
||||
message: message,
|
||||
message: sc.cancelled ? "Cancelled" : message,
|
||||
num_docs: fullDocs.length,
|
||||
docs: [],
|
||||
time_taken: (Date.now() - sc.createdAt) / 1000,
|
||||
@ -260,7 +260,7 @@ async function processJob(job: Job, token: string) {
|
||||
docs: fullDocs,
|
||||
};
|
||||
|
||||
await callWebhook(job.data.team_id, job.id as string, data);
|
||||
await callWebhook(job.data.team_id, job.data.crawl_id, data);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -14,6 +14,7 @@ const RATE_LIMITS = {
|
||||
standardNew: 10,
|
||||
standardnew: 10,
|
||||
growth: 50,
|
||||
growthdouble: 50,
|
||||
},
|
||||
scrape: {
|
||||
default: 20,
|
||||
@ -26,6 +27,7 @@ const RATE_LIMITS = {
|
||||
standardNew: 50,
|
||||
standardnew: 50,
|
||||
growth: 500,
|
||||
growthdouble: 500,
|
||||
},
|
||||
search: {
|
||||
default: 20,
|
||||
@ -38,6 +40,7 @@ const RATE_LIMITS = {
|
||||
standardNew: 50,
|
||||
standardnew: 50,
|
||||
growth: 500,
|
||||
growthdouble: 500,
|
||||
},
|
||||
preview: {
|
||||
free: 5,
|
||||
|
Loading…
x
Reference in New Issue
Block a user