This commit is contained in:
Nicolas 2024-08-21 22:20:40 -03:00
parent 6bdb1d045d
commit c7bfe4ffe8
8 changed files with 37 additions and 13 deletions

View File

@ -25,10 +25,11 @@ import {
} from "../../src/lib/crawl-redis";
import { getScrapeQueue } from "../../src/services/queue-service";
import { checkAndUpdateURL } from "../../src/lib/validateUrl";
import { getJobPriority } from "../../src/lib/job-priority";
export async function crawlController(req: Request, res: Response) {
try {
const { success, team_id, error, status } = await authenticateUser(
const { success, team_id, error, status, plan } = await authenticateUser(
req,
res,
RateLimiterMode.Crawl
@ -126,6 +127,7 @@ export async function crawlController(req: Request, res: Response) {
crawlerOptions,
pageOptions,
team_id,
plan,
createdAt: Date.now(),
};
@ -175,6 +177,10 @@ export async function crawlController(req: Request, res: Response) {
await getScrapeQueue().addBulk(jobs);
} else {
await lockURL(id, sc, url);
// Not needed, first one should be 15.
// const jobPriority = await getJobPriority({plan, team_id, basePriority: 10})
const job = await addScrapeJob(
{
url,

View File

@ -10,7 +10,7 @@ import { checkAndUpdateURL } from "../../src/lib/validateUrl";
export async function crawlPreviewController(req: Request, res: Response) {
try {
const { success, error, status } = await authenticateUser(
const { success, error, status, team_id:a, plan } = await authenticateUser(
req,
res,
RateLimiterMode.Preview
@ -88,6 +88,7 @@ export async function crawlPreviewController(req: Request, res: Response) {
crawlerOptions,
pageOptions,
team_id,
plan,
robots,
createdAt: Date.now(),
};

View File

@ -38,7 +38,7 @@ export async function scrapeHelper(
return { success: false, error: "Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.", returnCode: 403 };
}
const jobPriority = await getJobPriority({plan, team_id})
const jobPriority = await getJobPriority({plan, team_id, basePriority: 10})
const job = await addScrapeJob({
url,

View File

@ -213,7 +213,7 @@ async function sendScrapeRequests() {
await Promise.all(requests);
}
sendScrapeRequests();
// sendScrapeRequests();
// const sq = getScrapeQueue();
// sq.on("waiting", j => ScrapeEvents.logJobEvent(j, "waiting"));

View File

@ -6,6 +6,7 @@ export type StoredCrawl = {
crawlerOptions: any;
pageOptions: any;
team_id: string;
plan: string;
robots?: string;
cancelled?: boolean;
createdAt: number;

View File

@ -30,9 +30,11 @@ export async function deleteJobPriority(team_id, job_id) {
export async function getJobPriority({
plan,
team_id,
basePriority = 10
}: {
plan: PlanType;
team_id: string;
basePriority: number;
}): Promise<number> {
const setKey = SET_KEY_PREFIX + team_id;
@ -40,11 +42,18 @@ export async function getJobPriority({
const setLength = await redisConnection.scard(setKey);
// Determine the priority based on the plan and set length
let basePriority = 10;
let planModifier = 1;
let bucketLimit = 0;
switch (plan) {
case "free":
bucketLimit = 25;
planModifier = 1;
break;
case "hobby":
bucketLimit = 50;
planModifier = 0.5;
break;
case "standard":
case "standardnew":
bucketLimit = 100;
@ -55,11 +64,8 @@ export async function getJobPriority({
bucketLimit = 200;
planModifier = 0.2;
break;
case "hobby":
bucketLimit = 50;
planModifier = 0.5;
break;
case "free":
default:
bucketLimit = 25;
planModifier = 1;

View File

@ -10,7 +10,7 @@ export async function addScrapeJob(
jobPriority: number = 10
): Promise<Job> {
return await getScrapeQueue().add(jobId, webScraperOptions, {
priority: webScraperOptions.crawl_id ? 20 : jobPriority,
priority: jobPriority,
...options,
jobId,
});

View File

@ -21,7 +21,8 @@ import { addCrawlJob, addCrawlJobDone, crawlToCrawler, finishCrawl, getCrawl, ge
import { StoredCrawl } from "../lib/crawl-redis";
import { addScrapeJob } from "./queue-jobs";
import { supabaseGetJobById } from "../../src/lib/supabase-jobs";
import { addJobPriority, deleteJobPriority } from "../../src/lib/job-priority";
import { addJobPriority, deleteJobPriority, getJobPriority } from "../../src/lib/job-priority";
import { PlanType } from "../types";
if (process.env.ENV === "production") {
initSDK({
@ -216,6 +217,15 @@ async function processJob(job: Job, token: string) {
for (const link of links) {
if (await lockURL(job.data.crawl_id, sc, link)) {
const jobPriority = await getJobPriority({plan:sc.plan as PlanType, team_id: sc.team_id, basePriority: job.data.crawl_id ? 20 : 10})
const jobId = uuidv4();
console.log("plan: ", sc.plan);
console.log("team_id: ", sc.team_id)
console.log("base priority: ", job.data.crawl_id ? 20 : 10)
console.log("job priority: " , jobPriority, "\n\n\n")
const newJob = await addScrapeJob({
url: link,
mode: "single_urls",
@ -224,7 +234,7 @@ async function processJob(job: Job, token: string) {
pageOptions: sc.pageOptions,
origin: job.data.origin,
crawl_id: job.data.crawl_id,
});
}, {}, jobId, jobPriority);
await addCrawlJob(job.data.crawl_id, newJob.id);
}