This commit is contained in:
Nicolas 2024-08-21 22:20:40 -03:00
parent 6bdb1d045d
commit c7bfe4ffe8
8 changed files with 37 additions and 13 deletions

View File

@ -25,10 +25,11 @@ import {
} from "../../src/lib/crawl-redis"; } from "../../src/lib/crawl-redis";
import { getScrapeQueue } from "../../src/services/queue-service"; import { getScrapeQueue } from "../../src/services/queue-service";
import { checkAndUpdateURL } from "../../src/lib/validateUrl"; import { checkAndUpdateURL } from "../../src/lib/validateUrl";
import { getJobPriority } from "../../src/lib/job-priority";
export async function crawlController(req: Request, res: Response) { export async function crawlController(req: Request, res: Response) {
try { try {
const { success, team_id, error, status } = await authenticateUser( const { success, team_id, error, status, plan } = await authenticateUser(
req, req,
res, res,
RateLimiterMode.Crawl RateLimiterMode.Crawl
@ -126,6 +127,7 @@ export async function crawlController(req: Request, res: Response) {
crawlerOptions, crawlerOptions,
pageOptions, pageOptions,
team_id, team_id,
plan,
createdAt: Date.now(), createdAt: Date.now(),
}; };
@ -175,6 +177,10 @@ export async function crawlController(req: Request, res: Response) {
await getScrapeQueue().addBulk(jobs); await getScrapeQueue().addBulk(jobs);
} else { } else {
await lockURL(id, sc, url); await lockURL(id, sc, url);
// Not needed, first one should be 15.
// const jobPriority = await getJobPriority({plan, team_id, basePriority: 10})
const job = await addScrapeJob( const job = await addScrapeJob(
{ {
url, url,

View File

@ -10,7 +10,7 @@ import { checkAndUpdateURL } from "../../src/lib/validateUrl";
export async function crawlPreviewController(req: Request, res: Response) { export async function crawlPreviewController(req: Request, res: Response) {
try { try {
const { success, error, status } = await authenticateUser( const { success, error, status, team_id:a, plan } = await authenticateUser(
req, req,
res, res,
RateLimiterMode.Preview RateLimiterMode.Preview
@ -88,6 +88,7 @@ export async function crawlPreviewController(req: Request, res: Response) {
crawlerOptions, crawlerOptions,
pageOptions, pageOptions,
team_id, team_id,
plan,
robots, robots,
createdAt: Date.now(), createdAt: Date.now(),
}; };

View File

@ -38,7 +38,7 @@ export async function scrapeHelper(
return { success: false, error: "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.", returnCode: 403 }; return { success: false, error: "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.", returnCode: 403 };
} }
const jobPriority = await getJobPriority({plan, team_id}) const jobPriority = await getJobPriority({plan, team_id, basePriority: 10})
const job = await addScrapeJob({ const job = await addScrapeJob({
url, url,

View File

@ -213,7 +213,7 @@ async function sendScrapeRequests() {
await Promise.all(requests); await Promise.all(requests);
} }
sendScrapeRequests(); // sendScrapeRequests();
// const sq = getScrapeQueue(); // const sq = getScrapeQueue();
// sq.on("waiting", j => ScrapeEvents.logJobEvent(j, "waiting")); // sq.on("waiting", j => ScrapeEvents.logJobEvent(j, "waiting"));

View File

@ -6,6 +6,7 @@ export type StoredCrawl = {
crawlerOptions: any; crawlerOptions: any;
pageOptions: any; pageOptions: any;
team_id: string; team_id: string;
plan: string;
robots?: string; robots?: string;
cancelled?: boolean; cancelled?: boolean;
createdAt: number; createdAt: number;

View File

@ -30,9 +30,11 @@ export async function deleteJobPriority(team_id, job_id) {
export async function getJobPriority({ export async function getJobPriority({
plan, plan,
team_id, team_id,
basePriority = 10
}: { }: {
plan: PlanType; plan: PlanType;
team_id: string; team_id: string;
basePriority: number;
}): Promise<number> { }): Promise<number> {
const setKey = SET_KEY_PREFIX + team_id; const setKey = SET_KEY_PREFIX + team_id;
@ -40,11 +42,18 @@ export async function getJobPriority({
const setLength = await redisConnection.scard(setKey); const setLength = await redisConnection.scard(setKey);
// Determine the priority based on the plan and set length // Determine the priority based on the plan and set length
let basePriority = 10;
let planModifier = 1; let planModifier = 1;
let bucketLimit = 0; let bucketLimit = 0;
switch (plan) { switch (plan) {
case "free":
bucketLimit = 25;
planModifier = 1;
break;
case "hobby":
bucketLimit = 50;
planModifier = 0.5;
break;
case "standard": case "standard":
case "standardnew": case "standardnew":
bucketLimit = 100; bucketLimit = 100;
@ -55,11 +64,8 @@ export async function getJobPriority({
bucketLimit = 200; bucketLimit = 200;
planModifier = 0.2; planModifier = 0.2;
break; break;
case "hobby":
bucketLimit = 50;
planModifier = 0.5;
break;
case "free":
default: default:
bucketLimit = 25; bucketLimit = 25;
planModifier = 1; planModifier = 1;

View File

@ -10,7 +10,7 @@ export async function addScrapeJob(
jobPriority: number = 10 jobPriority: number = 10
): Promise<Job> { ): Promise<Job> {
return await getScrapeQueue().add(jobId, webScraperOptions, { return await getScrapeQueue().add(jobId, webScraperOptions, {
priority: webScraperOptions.crawl_id ? 20 : jobPriority, priority: jobPriority,
...options, ...options,
jobId, jobId,
}); });

View File

@ -21,7 +21,8 @@ import { addCrawlJob, addCrawlJobDone, crawlToCrawler, finishCrawl, getCrawl, ge
import { StoredCrawl } from "../lib/crawl-redis"; import { StoredCrawl } from "../lib/crawl-redis";
import { addScrapeJob } from "./queue-jobs"; import { addScrapeJob } from "./queue-jobs";
import { supabaseGetJobById } from "../../src/lib/supabase-jobs"; import { supabaseGetJobById } from "../../src/lib/supabase-jobs";
import { addJobPriority, deleteJobPriority } from "../../src/lib/job-priority"; import { addJobPriority, deleteJobPriority, getJobPriority } from "../../src/lib/job-priority";
import { PlanType } from "../types";
if (process.env.ENV === "production") { if (process.env.ENV === "production") {
initSDK({ initSDK({
@ -216,6 +217,15 @@ async function processJob(job: Job, token: string) {
for (const link of links) { for (const link of links) {
if (await lockURL(job.data.crawl_id, sc, link)) { if (await lockURL(job.data.crawl_id, sc, link)) {
const jobPriority = await getJobPriority({plan:sc.plan as PlanType, team_id: sc.team_id, basePriority: job.data.crawl_id ? 20 : 10})
const jobId = uuidv4();
console.log("plan: ", sc.plan);
console.log("team_id: ", sc.team_id)
console.log("base priority: ", job.data.crawl_id ? 20 : 10)
console.log("job priority: " , jobPriority, "\n\n\n")
const newJob = await addScrapeJob({ const newJob = await addScrapeJob({
url: link, url: link,
mode: "single_urls", mode: "single_urls",
@ -224,7 +234,7 @@ async function processJob(job: Job, token: string) {
pageOptions: sc.pageOptions, pageOptions: sc.pageOptions,
origin: job.data.origin, origin: job.data.origin,
crawl_id: job.data.crawl_id, crawl_id: job.data.crawl_id,
}); }, {}, jobId, jobPriority);
await addCrawlJob(job.data.crawl_id, newJob.id); await addCrawlJob(job.data.crawl_id, newJob.id);
} }