Merge branch 'main' into feat/fire-engine-chrome-cdp

This commit is contained in:
Nicolas 2024-07-18 12:48:42 -04:00
commit f11137352c
30 changed files with 1147 additions and 439 deletions

20
.github/workflows/check-redis.yml vendored Normal file
View File

@ -0,0 +1,20 @@
name: Check Redis
on:
schedule:
- cron: '*/5 * * * *'
env:
BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }}
jobs:
clean-jobs:
runs-on: ubuntu-latest
steps:
- name: Send GET request to check queues
run: |
response=$(curl --write-out '%{http_code}' --silent --output /dev/null --max-time 180 https://api.firecrawl.dev/admin/${{ secrets.BULL_AUTH_KEY }}/redis-health)
if [ "$response" -ne 200 ]; then
echo "Failed to check queues. Response: $response"
exit 1
fi
echo "Successfully checked queues. Response: $response"

View File

@ -3,6 +3,7 @@ NUM_WORKERS_PER_QUEUE=8
PORT=3002 PORT=3002
HOST=0.0.0.0 HOST=0.0.0.0
REDIS_URL=redis://localhost:6379 REDIS_URL=redis://localhost:6379
REDIS_RATE_LIMIT_URL=redis://localhost:6379
PLAYWRIGHT_MICROSERVICE_URL=http://playwright-service:3000/html PLAYWRIGHT_MICROSERVICE_URL=http://playwright-service:3000/html
## To turn on DB authentication, you need to set up supabase. ## To turn on DB authentication, you need to set up supabase.

View File

@ -5,6 +5,7 @@ SUPABASE_ANON_TOKEN=
SUPABASE_URL= SUPABASE_URL=
SUPABASE_SERVICE_TOKEN= SUPABASE_SERVICE_TOKEN=
REDIS_URL= REDIS_URL=
REDIS_RATE_LIMIT_URL=
SCRAPING_BEE_API_KEY= SCRAPING_BEE_API_KEY=
OPENAI_API_KEY= OPENAI_API_KEY=
ANTHROPIC_API_KEY= ANTHROPIC_API_KEY=

View File

@ -8,9 +8,6 @@ primary_region = 'mia'
kill_signal = 'SIGINT' kill_signal = 'SIGINT'
kill_timeout = '30s' kill_timeout = '30s'
[deploy]
release_command = 'node dist/src/trigger-shutdown.js https://staging-firecrawl-scraper-js.fly.dev'
[build] [build]
[processes] [processes]

View File

@ -8,9 +8,6 @@ primary_region = 'mia'
kill_signal = 'SIGINT' kill_signal = 'SIGINT'
kill_timeout = '30s' kill_timeout = '30s'
[deploy]
release_command = 'node dist/src/trigger-shutdown.js https://api.firecrawl.dev'
[build] [build]
[processes] [processes]

View File

@ -41,14 +41,42 @@
"pageOptions": { "pageOptions": {
"type": "object", "type": "object",
"properties": { "properties": {
"headers": {
"type": "object",
"description": "Headers to send with the request. Can be used to send cookies, user-agent, etc."
},
"includeHtml": {
"type": "boolean",
"description": "Include the HTML version of the content on page. Will output a html key in the response.",
"default": false
},
"includeRawHtml": {
"type": "boolean",
"description": "Include the raw HTML content of the page. Will output a rawHtml key in the response.",
"default": false
},
"onlyIncludeTags": {
"type": "array",
"items": {
"type": "string"
},
"description": "Only include tags, classes and ids from the page in the final output. Use comma separated values. Example: 'script, .ad, #footer'"
},
"onlyMainContent": { "onlyMainContent": {
"type": "boolean", "type": "boolean",
"description": "Only return the main content of the page excluding headers, navs, footers, etc.", "description": "Only return the main content of the page excluding headers, navs, footers, etc.",
"default": false "default": false
}, },
"includeHtml": { "removeTags": {
"type": "array",
"items": {
"type": "string"
},
"description": "Tags, classes and ids to remove from the page. Use comma separated values. Example: 'script, .ad, #footer'"
},
"replaceAllPathsWithAbsolutePaths": {
"type": "boolean", "type": "boolean",
"description": "Include the raw HTML content of the page. Will output a html key in the response.", "description": "Replace all relative paths with absolute paths for images and links",
"default": false "default": false
}, },
"screenshot": { "screenshot": {
@ -60,49 +88,27 @@
"type": "integer", "type": "integer",
"description": "Wait x amount of milliseconds for the page to load to fetch content", "description": "Wait x amount of milliseconds for the page to load to fetch content",
"default": 0 "default": 0
},
"removeTags": {
"type": "array",
"items": {
"type": "string"
},
"description": "Tags, classes and ids to remove from the page. Use comma separated values. Example: 'script, .ad, #footer'"
},
"onlyIncludeTags": {
"type": "array",
"items": {
"type": "string"
},
"description": "Only include tags, classes and ids from the page in the final output. Use comma separated values. Example: 'script, .ad, #footer'"
},
"headers": {
"type": "object",
"description": "Headers to send with the request. Can be used to send cookies, user-agent, etc."
},
"replaceAllPathsWithAbsolutePaths": {
"type": "boolean",
"description": "Replace all relative paths with absolute paths for images and links",
"default": false
} }
} }
}, },
"extractorOptions": { "extractorOptions": {
"type": "object", "type": "object",
"description": "Options for LLM-based extraction of structured information from the page content", "description": "Options for extraction of structured information from the page content. Note: LLM-based extraction is not performed by default and only occurs when explicitly configured. The 'markdown' mode simply returns the scraped markdown and is the default mode for scraping.",
"default": {},
"properties": { "properties": {
"mode": { "mode": {
"type": "string", "type": "string",
"enum": ["llm-extraction", "llm-extraction-from-raw-html"], "enum": ["markdown", "llm-extraction", "llm-extraction-from-raw-html", "llm-extraction-from-markdown"],
"description": "The extraction mode to use. llm-extraction: Extracts information from the cleaned and parsed content. llm-extraction-from-raw-html: Extracts information directly from the raw HTML." "description": "The extraction mode to use. 'markdown': Returns the scraped markdown content, does not perform LLM extraction. 'llm-extraction': Extracts information from the cleaned and parsed content using LLM. 'llm-extraction-from-raw-html': Extracts information directly from the raw HTML using LLM. 'llm-extraction-from-markdown': Extracts information from the markdown content using LLM."
}, },
"extractionPrompt": { "extractionPrompt": {
"type": "string", "type": "string",
"description": "A prompt describing what information to extract from the page" "description": "A prompt describing what information to extract from the page, applicable for LLM extraction modes."
}, },
"extractionSchema": { "extractionSchema": {
"type": "object", "type": "object",
"additionalProperties": true, "additionalProperties": true,
"description": "The schema for the data to be extracted", "description": "The schema for the data to be extracted, required only for LLM extraction modes.",
"required": [ "required": [
"company_mission", "company_mission",
"supports_sso", "supports_sso",
@ -134,13 +140,52 @@
} }
}, },
"402": { "402": {
"description": "Payment required" "description": "Payment required",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"error": {
"type": "string",
"example": "Payment required to access this resource."
}
}
}
}
}
}, },
"429": { "429": {
"description": "Too many requests" "description": "Too many requests",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"error": {
"type": "string",
"example": "Request rate limit exceeded. Please wait and try again later."
}
}
}
}
}
}, },
"500": { "500": {
"description": "Server error" "description": "Server error",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"error": {
"type": "string",
"example": "An unexpected error occurred on the server."
}
}
}
}
}
} }
} }
} }
@ -216,7 +261,12 @@
}, },
"allowBackwardCrawling": { "allowBackwardCrawling": {
"type": "boolean", "type": "boolean",
"description": "Allow backward crawling (crawl from the base URL to the previous URLs)", "description": "Enables the crawler to navigate from a specific URL to previously linked pages. For instance, from 'example.com/product/123' back to 'example.com/product'",
"default": false
},
"allowExternalContentLinks": {
"type": "boolean",
"description": "Allows the crawler to follow links to external websites.",
"default": false "default": false
} }
} }
@ -224,25 +274,32 @@
"pageOptions": { "pageOptions": {
"type": "object", "type": "object",
"properties": { "properties": {
"headers": {
"type": "object",
"description": "Headers to send with the request. Can be used to send cookies, user-agent, etc."
},
"includeHtml": {
"type": "boolean",
"description": "Include the HTML version of the content on page. Will output a html key in the response.",
"default": false
},
"includeRawHtml": {
"type": "boolean",
"description": "Include the raw HTML content of the page. Will output a rawHtml key in the response.",
"default": false
},
"onlyIncludeTags": {
"type": "array",
"items": {
"type": "string"
},
"description": "Only include tags, classes and ids from the page in the final output. Use comma separated values. Example: 'script, .ad, #footer'"
},
"onlyMainContent": { "onlyMainContent": {
"type": "boolean", "type": "boolean",
"description": "Only return the main content of the page excluding headers, navs, footers, etc.", "description": "Only return the main content of the page excluding headers, navs, footers, etc.",
"default": false "default": false
}, },
"includeHtml": {
"type": "boolean",
"description": "Include the raw HTML content of the page. Will output a html key in the response.",
"default": false
},
"screenshot": {
"type": "boolean",
"description": "Include a screenshot of the top of the page that you are scraping.",
"default": false
},
"headers": {
"type": "object",
"description": "Headers to send with the request when scraping. Can be used to send cookies, user-agent, etc."
},
"removeTags": { "removeTags": {
"type": "array", "type": "array",
"items": { "items": {
@ -254,6 +311,16 @@
"type": "boolean", "type": "boolean",
"description": "Replace all relative paths with absolute paths for images and links", "description": "Replace all relative paths with absolute paths for images and links",
"default": false "default": false
},
"screenshot": {
"type": "boolean",
"description": "Include a screenshot of the top of the page that you are scraping.",
"default": false
},
"waitFor": {
"type": "integer",
"description": "Wait x amount of milliseconds for the page to load to fetch content",
"default": 0
} }
} }
} }
@ -275,13 +342,52 @@
} }
}, },
"402": { "402": {
"description": "Payment required" "description": "Payment required",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"error": {
"type": "string",
"example": "Payment required to access this resource."
}
}
}
}
}
}, },
"429": { "429": {
"description": "Too many requests" "description": "Too many requests",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"error": {
"type": "string",
"example": "Request rate limit exceeded. Please wait and try again later."
}
}
}
}
}
}, },
"500": { "500": {
"description": "Server error" "description": "Server error",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"error": {
"type": "string",
"example": "An unexpected error occurred on the server."
}
}
}
}
}
} }
} }
} }
@ -323,7 +429,12 @@
}, },
"includeHtml": { "includeHtml": {
"type": "boolean", "type": "boolean",
"description": "Include the raw HTML content of the page. Will output a html key in the response.", "description": "Include the HTML version of the content on page. Will output a html key in the response.",
"default": false
},
"includeRawHtml": {
"type": "boolean",
"description": "Include the raw HTML content of the page. Will output a rawHtml key in the response.",
"default": false "default": false
} }
} }
@ -355,13 +466,52 @@
} }
}, },
"402": { "402": {
"description": "Payment required" "description": "Payment required",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"error": {
"type": "string",
"example": "Payment required to access this resource."
}
}
}
}
}
}, },
"429": { "429": {
"description": "Too many requests" "description": "Too many requests",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"error": {
"type": "string",
"example": "Request rate limit exceeded. Please wait and try again later."
}
}
}
}
}
}, },
"500": { "500": {
"description": "Server error" "description": "Server error",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"error": {
"type": "string",
"example": "An unexpected error occurred on the server."
}
}
}
}
}
} }
} }
} }
@ -403,14 +553,6 @@
"type": "integer", "type": "integer",
"description": "Current page number" "description": "Current page number"
}, },
"current_url": {
"type": "string",
"description": "Current URL being scraped"
},
"current_step": {
"type": "string",
"description": "Current step in the process"
},
"total": { "total": {
"type": "integer", "type": "integer",
"description": "Total number of pages" "description": "Total number of pages"
@ -427,7 +569,7 @@
"items": { "items": {
"$ref": "#/components/schemas/CrawlStatusResponseObj" "$ref": "#/components/schemas/CrawlStatusResponseObj"
}, },
"description": "Partial documents returned as it is being crawled (streaming). **This feature is currently in alpha - expect breaking changes** When a page is ready, it will append to the partial_data array, so there is no need to wait for the entire website to be crawled. There is a max of 50 items in the array response. The oldest item (top of the array) will be removed when the new item is added to the array." "description": "Partial documents returned as it is being crawled (streaming). **This feature is currently in alpha - expect breaking changes** When a page is ready, it will append to the partial_data array, so there is no need to wait for the entire website to be crawled. When the crawl is done, partial_data will become empty and the result will be available in `data`. There is a max of 50 items in the array response. The oldest item (top of the array) will be removed when the new item is added to the array."
} }
} }
} }
@ -435,13 +577,52 @@
} }
}, },
"402": { "402": {
"description": "Payment required" "description": "Payment required",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"error": {
"type": "string",
"example": "Payment required to access this resource."
}
}
}
}
}
}, },
"429": { "429": {
"description": "Too many requests" "description": "Too many requests",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"error": {
"type": "string",
"example": "Request rate limit exceeded. Please wait and try again later."
}
}
}
}
}
}, },
"500": { "500": {
"description": "Server error" "description": "Server error",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"error": {
"type": "string",
"example": "An unexpected error occurred on the server."
}
}
}
}
}
} }
} }
} }
@ -485,13 +666,52 @@
} }
}, },
"402": { "402": {
"description": "Payment required" "description": "Payment required",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"error": {
"type": "string",
"example": "Payment required to access this resource."
}
}
}
}
}
}, },
"429": { "429": {
"description": "Too many requests" "description": "Too many requests",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"error": {
"type": "string",
"example": "Request rate limit exceeded. Please wait and try again later."
}
}
}
}
}
}, },
"500": { "500": {
"description": "Server error" "description": "Server error",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"error": {
"type": "string",
"example": "An unexpected error occurred on the server."
}
}
}
}
}
} }
} }
} }
@ -523,7 +743,12 @@
"html": { "html": {
"type": "string", "type": "string",
"nullable": true, "nullable": true,
"description": "Raw HTML content of the page if `includeHtml` is true" "description": "HTML version of the content on page if `includeHtml` is true"
},
"rawHtml": {
"type": "string",
"nullable": true,
"description": "Raw HTML content of the page if `includeRawHtml` is true"
}, },
"metadata": { "metadata": {
"type": "object", "type": "object",
@ -583,7 +808,12 @@
"html": { "html": {
"type": "string", "type": "string",
"nullable": true, "nullable": true,
"description": "Raw HTML content of the page if `includeHtml` is true" "description": "HTML version of the content on page if `includeHtml` is true"
},
"rawHtml": {
"type": "string",
"nullable": true,
"description": "Raw HTML content of the page if `includeRawHtml` is true"
}, },
"index": { "index": {
"type": "integer", "type": "integer",

View File

@ -19,8 +19,8 @@
"mongo-docker": "docker run -d -p 2717:27017 -v ./mongo-data:/data/db --name mongodb mongo:latest", "mongo-docker": "docker run -d -p 2717:27017 -v ./mongo-data:/data/db --name mongodb mongo:latest",
"mongo-docker-console": "docker exec -it mongodb mongosh", "mongo-docker-console": "docker exec -it mongodb mongosh",
"run-example": "npx ts-node src/example.ts", "run-example": "npx ts-node src/example.ts",
"deploy:fly": "flyctl deploy && node postdeploy.js https://api.firecrawl.dev", "deploy:fly": "flyctl deploy",
"deploy:fly:staging": "fly deploy -c fly.staging.toml && node postdeploy.js https://staging-firecrawl-scraper-js.fly.dev" "deploy:fly:staging": "fly deploy -c fly.staging.toml"
}, },
"author": "", "author": "",
"license": "ISC", "license": "ISC",
@ -73,7 +73,7 @@
"form-data": "^4.0.0", "form-data": "^4.0.0",
"glob": "^10.4.2", "glob": "^10.4.2",
"gpt3-tokenizer": "^1.1.5", "gpt3-tokenizer": "^1.1.5",
"ioredis": "^5.3.2", "ioredis": "^5.4.1",
"joplin-turndown-plugin-gfm": "^1.0.12", "joplin-turndown-plugin-gfm": "^1.0.12",
"json-schema-to-zod": "^2.3.0", "json-schema-to-zod": "^2.3.0",
"keyword-extractor": "^0.0.28", "keyword-extractor": "^0.0.28",
@ -92,7 +92,6 @@
"promptable": "^0.0.10", "promptable": "^0.0.10",
"puppeteer": "^22.12.1", "puppeteer": "^22.12.1",
"rate-limiter-flexible": "2.4.2", "rate-limiter-flexible": "2.4.2",
"redis": "^4.6.7",
"resend": "^3.4.0", "resend": "^3.4.0",
"robots-parser": "^3.0.1", "robots-parser": "^3.0.1",
"scrapingbee": "^1.7.4", "scrapingbee": "^1.7.4",

View File

@ -90,7 +90,7 @@ importers:
specifier: ^1.1.5 specifier: ^1.1.5
version: 1.1.5 version: 1.1.5
ioredis: ioredis:
specifier: ^5.3.2 specifier: ^5.4.1
version: 5.4.1 version: 5.4.1
joplin-turndown-plugin-gfm: joplin-turndown-plugin-gfm:
specifier: ^1.0.12 specifier: ^1.0.12
@ -146,9 +146,6 @@ importers:
rate-limiter-flexible: rate-limiter-flexible:
specifier: 2.4.2 specifier: 2.4.2
version: 2.4.2 version: 2.4.2
redis:
specifier: ^4.6.7
version: 4.6.14
resend: resend:
specifier: ^3.4.0 specifier: ^3.4.0
version: 3.4.0 version: 3.4.0

View File

@ -1,11 +0,0 @@
require("dotenv").config();
fetch(process.argv[2] + "/admin/" + process.env.BULL_AUTH_KEY + "/unpause", {
method: "POST"
}).then(async x => {
console.log(await x.text());
process.exit(0);
}).catch(e => {
console.error(e);
process.exit(1);
});

View File

@ -116,11 +116,14 @@ export async function scrapeController(req: Request, res: Response) {
const crawlerOptions = req.body.crawlerOptions ?? {}; const crawlerOptions = req.body.crawlerOptions ?? {};
const pageOptions = { ...defaultPageOptions, ...req.body.pageOptions }; const pageOptions = { ...defaultPageOptions, ...req.body.pageOptions };
const extractorOptions = { ...defaultExtractorOptions, ...req.body.extractorOptions }; const extractorOptions = { ...defaultExtractorOptions, ...req.body.extractorOptions };
const origin = req.body.origin ?? defaultOrigin;
let timeout = req.body.timeout ?? defaultTimeout;
if (extractorOptions.mode === "llm-extraction") { if (extractorOptions.mode === "llm-extraction") {
pageOptions.onlyMainContent = true; pageOptions.onlyMainContent = true;
timeout = req.body.timeout ?? 90000;
} }
const origin = req.body.origin ?? defaultOrigin;
const timeout = req.body.timeout ?? defaultTimeout;
try { try {
const { success: creditsCheckSuccess, message: creditsCheckMessage } = const { success: creditsCheckSuccess, message: creditsCheckMessage } =

View File

@ -3,7 +3,6 @@ import bodyParser from "body-parser";
import cors from "cors"; import cors from "cors";
import "dotenv/config"; import "dotenv/config";
import { getWebScraperQueue } from "./services/queue-service"; import { getWebScraperQueue } from "./services/queue-service";
import { redisClient } from "./services/rate-limiter";
import { v0Router } from "./routes/v0"; import { v0Router } from "./routes/v0";
import { initSDK } from "@hyperdx/node-opentelemetry"; import { initSDK } from "@hyperdx/node-opentelemetry";
import cluster from "cluster"; import cluster from "cluster";
@ -11,6 +10,8 @@ import os from "os";
import { Job } from "bull"; import { Job } from "bull";
import { sendSlackWebhook } from "./services/alerts/slack"; import { sendSlackWebhook } from "./services/alerts/slack";
import { checkAlerts } from "./services/alerts"; import { checkAlerts } from "./services/alerts";
import Redis from "ioredis";
import { redisRateLimitClient } from "./services/rate-limiter";
const { createBullBoard } = require("@bull-board/api"); const { createBullBoard } = require("@bull-board/api");
const { BullAdapter } = require("@bull-board/api/bullAdapter"); const { BullAdapter } = require("@bull-board/api/bullAdapter");
@ -34,11 +35,9 @@ if (cluster.isMaster) {
cluster.fork(); cluster.fork();
} }
}); });
} else { } else {
const app = express(); const app = express();
global.isProduction = process.env.IS_PRODUCTION === "true"; global.isProduction = process.env.IS_PRODUCTION === "true";
app.use(bodyParser.urlencoded({ extended: true })); app.use(bodyParser.urlencoded({ extended: true }));
@ -46,6 +45,7 @@ if (cluster.isMaster) {
app.use(cors()); // Add this line to enable CORS app.use(cors()); // Add this line to enable CORS
const serverAdapter = new ExpressAdapter(); const serverAdapter = new ExpressAdapter();
serverAdapter.setBasePath(`/admin/${process.env.BULL_AUTH_KEY}/queues`); serverAdapter.setBasePath(`/admin/${process.env.BULL_AUTH_KEY}/queues`);
@ -73,7 +73,6 @@ if (cluster.isMaster) {
const DEFAULT_PORT = process.env.PORT ?? 3002; const DEFAULT_PORT = process.env.PORT ?? 3002;
const HOST = process.env.HOST ?? "localhost"; const HOST = process.env.HOST ?? "localhost";
redisClient.connect();
// HyperDX OpenTelemetry // HyperDX OpenTelemetry
if (process.env.ENV === "production") { if (process.env.ENV === "production") {
@ -120,60 +119,6 @@ if (cluster.isMaster) {
} }
}); });
app.post(`/admin/${process.env.BULL_AUTH_KEY}/shutdown`, async (req, res) => {
// return res.status(200).json({ ok: true });
try {
console.log("Gracefully shutting down...");
await getWebScraperQueue().pause(false, true);
res.json({ ok: true });
} catch (error) {
console.error(error);
return res.status(500).json({ error: error.message });
}
});
app.post(`/admin/${process.env.BULL_AUTH_KEY}/unpause`, async (req, res) => {
try {
const wsq = getWebScraperQueue();
const jobs = await wsq.getActive();
console.log("Requeueing", jobs.length, "jobs...");
if (jobs.length > 0) {
console.log(" Removing", jobs.length, "jobs...");
await Promise.all(jobs.map(async x => {
try {
await wsq.client.del(await x.lockKey());
await x.takeLock();
await x.moveToFailed({ message: "interrupted" });
await x.remove();
} catch (e) {
console.warn("Failed to remove job", x.id, e);
}
}));
console.log(" Re-adding", jobs.length, "jobs...");
await wsq.addBulk(jobs.map(x => ({
data: x.data,
opts: {
jobId: x.id,
},
})));
console.log(" Done!");
}
await getWebScraperQueue().resume(false);
res.json({ ok: true });
} catch (error) {
console.error(error);
return res.status(500).json({ error: error.message });
}
});
app.get(`/serverHealthCheck`, async (req, res) => { app.get(`/serverHealthCheck`, async (req, res) => {
try { try {
const webScraperQueue = getWebScraperQueue(); const webScraperQueue = getWebScraperQueue();
@ -268,15 +213,20 @@ if (cluster.isMaster) {
const numberOfBatches = 9; // Adjust based on your needs const numberOfBatches = 9; // Adjust based on your needs
const completedJobsPromises: Promise<Job[]>[] = []; const completedJobsPromises: Promise<Job[]>[] = [];
for (let i = 0; i < numberOfBatches; i++) { for (let i = 0; i < numberOfBatches; i++) {
completedJobsPromises.push(webScraperQueue.getJobs( completedJobsPromises.push(
webScraperQueue.getJobs(
["completed"], ["completed"],
i * batchSize, i * batchSize,
i * batchSize + batchSize, i * batchSize + batchSize,
true true
)); )
);
} }
const completedJobs: Job[] = (await Promise.all(completedJobsPromises)).flat(); const completedJobs: Job[] = (
const before24hJobs = completedJobs.filter( await Promise.all(completedJobsPromises)
).flat();
const before24hJobs =
completedJobs.filter(
(job) => job.finishedOn < Date.now() - 24 * 60 * 60 * 1000 (job) => job.finishedOn < Date.now() - 24 * 60 * 60 * 1000
) || []; ) || [];
@ -288,7 +238,7 @@ if (cluster.isMaster) {
for (const job of before24hJobs) { for (const job of before24hJobs) {
try { try {
await job.remove() await job.remove();
count++; count++;
} catch (jobError) { } catch (jobError) {
console.error(`Failed to remove job with ID ${job.id}:`, jobError); console.error(`Failed to remove job with ID ${job.id}:`, jobError);
@ -306,8 +256,75 @@ if (cluster.isMaster) {
res.send({ isProduction: global.isProduction }); res.send({ isProduction: global.isProduction });
}); });
app.get(
`/admin/${process.env.BULL_AUTH_KEY}/redis-health`,
async (req, res) => {
try {
const queueRedis = new Redis(process.env.REDIS_URL);
const testKey = "test";
const testValue = "test";
// Test queueRedis
let queueRedisHealth;
try {
await queueRedis.set(testKey, testValue);
queueRedisHealth = await queueRedis.get(testKey);
await queueRedis.del(testKey);
} catch (error) {
console.error("queueRedis health check failed:", error);
queueRedisHealth = null;
}
// Test redisRateLimitClient
let redisRateLimitHealth;
try {
await redisRateLimitClient.set(testKey, testValue);
redisRateLimitHealth = await redisRateLimitClient.get(testKey);
await redisRateLimitClient.del(testKey);
} catch (error) {
console.error("redisRateLimitClient health check failed:", error);
redisRateLimitHealth = null;
}
const healthStatus = {
queueRedis: queueRedisHealth === testValue ? "healthy" : "unhealthy",
redisRateLimitClient:
redisRateLimitHealth === testValue ? "healthy" : "unhealthy",
};
if (
healthStatus.queueRedis === "healthy" &&
healthStatus.redisRateLimitClient === "healthy"
) {
console.log("Both Redis instances are healthy");
return res
.status(200)
.json({ status: "healthy", details: healthStatus });
} else {
console.log("Redis instances health check:", healthStatus);
await sendSlackWebhook(
`[REDIS DOWN] Redis instances health check: ${JSON.stringify(
healthStatus
)}`,
true
);
return res
.status(500)
.json({ status: "unhealthy", details: healthStatus });
}
} catch (error) {
console.error("Redis health check failed:", error);
await sendSlackWebhook(
`[REDIS DOWN] Redis instances health check: ${error.message}`,
true
);
return res
.status(500)
.json({ status: "unhealthy", message: error.message });
}
}
);
console.log(`Worker ${process.pid} started`); console.log(`Worker ${process.pid} started`);
} }

View File

@ -89,6 +89,7 @@ export class Document {
warning?: string; warning?: string;
index?: number; index?: number;
linksOnPage?: string[]; // Add this new field as a separate property
constructor(data: Partial<Document>) { constructor(data: Partial<Document>) {
if (!data.content) { if (!data.content) {
@ -102,6 +103,7 @@ export class Document {
this.markdown = data.markdown || ""; this.markdown = data.markdown || "";
this.childrenLinks = data.childrenLinks || undefined; this.childrenLinks = data.childrenLinks || undefined;
this.provider = data.provider || undefined; this.provider = data.provider || undefined;
this.linksOnPage = data.linksOnPage; // Assign linksOnPage if provided
} }
} }

View File

@ -1,3 +1,7 @@
import { scrapSingleUrl } from '../single_url';
import { PageOptions } from '../../../lib/entities';
jest.mock('../single_url', () => { jest.mock('../single_url', () => {
const originalModule = jest.requireActual('../single_url'); const originalModule = jest.requireActual('../single_url');
originalModule.fetchHtmlContent = jest.fn().mockResolvedValue('<html><head><title>Test</title></head><body><h1>Roast</h1></body></html>'); originalModule.fetchHtmlContent = jest.fn().mockResolvedValue('<html><head><title>Test</title></head><body><h1>Roast</h1></body></html>');
@ -5,9 +9,6 @@ jest.mock('../single_url', () => {
return originalModule; return originalModule;
}); });
import { scrapSingleUrl } from '../single_url';
import { PageOptions } from '../../../lib/entities';
describe('scrapSingleUrl', () => { describe('scrapSingleUrl', () => {
it('should handle includeHtml option correctly', async () => { it('should handle includeHtml option correctly', async () => {
const url = 'https://roastmywebsite.ai'; const url = 'https://roastmywebsite.ai';
@ -22,3 +23,15 @@ describe('scrapSingleUrl', () => {
}, 10000); }, 10000);
}); });
it('should return a list of links on the mendable.ai page', async () => {
const url = 'https://mendable.ai';
const pageOptions: PageOptions = { includeHtml: true };
const result = await scrapSingleUrl(url, pageOptions);
// Check if the result contains a list of links
expect(result.linksOnPage).toBeDefined();
expect(Array.isArray(result.linksOnPage)).toBe(true);
expect(result.linksOnPage.length).toBeGreaterThan(0);
expect(result.linksOnPage).toContain('https://mendable.ai/blog')
}, 10000);

View File

@ -16,6 +16,7 @@ import { scrapWithFetch } from "./scrapers/fetch";
import { scrapWithFireEngine } from "./scrapers/fireEngine"; import { scrapWithFireEngine } from "./scrapers/fireEngine";
import { scrapWithPlaywright } from "./scrapers/playwright"; import { scrapWithPlaywright } from "./scrapers/playwright";
import { scrapWithScrapingBee } from "./scrapers/scrapingBee"; import { scrapWithScrapingBee } from "./scrapers/scrapingBee";
import { extractLinks } from "./utils/utils";
dotenv.config(); dotenv.config();
@ -113,6 +114,8 @@ function getScrapingFallbackOrder(
return scrapersInOrder as (typeof baseScrapers)[number][]; return scrapersInOrder as (typeof baseScrapers)[number][];
} }
export async function scrapSingleUrl( export async function scrapSingleUrl(
urlToScrap: string, urlToScrap: string,
pageOptions: PageOptions = { pageOptions: PageOptions = {
@ -247,7 +250,6 @@ export async function scrapSingleUrl(
scraperResponse.text = customScrapedContent.html; scraperResponse.text = customScrapedContent.html;
screenshot = customScrapedContent.screenshot; screenshot = customScrapedContent.screenshot;
} }
//* TODO: add an optional to return markdown or structured/extracted content //* TODO: add an optional to return markdown or structured/extracted content
let cleanedHtml = removeUnwantedElements(scraperResponse.text, pageOptions); let cleanedHtml = removeUnwantedElements(scraperResponse.text, pageOptions);
return { return {
@ -322,6 +324,10 @@ export async function scrapSingleUrl(
const soup = cheerio.load(rawHtml); const soup = cheerio.load(rawHtml);
const metadata = extractMetadata(soup, urlToScrap); const metadata = extractMetadata(soup, urlToScrap);
let linksOnPage: string[] | undefined;
linksOnPage = extractLinks(rawHtml, urlToScrap);
let document: Document; let document: Document;
if (screenshot && screenshot.length > 0) { if (screenshot && screenshot.length > 0) {
document = { document = {
@ -333,6 +339,7 @@ export async function scrapSingleUrl(
extractorOptions.mode === "llm-extraction-from-raw-html" extractorOptions.mode === "llm-extraction-from-raw-html"
? rawHtml ? rawHtml
: undefined, : undefined,
linksOnPage,
metadata: { metadata: {
...metadata, ...metadata,
screenshot: screenshot, screenshot: screenshot,
@ -357,6 +364,7 @@ export async function scrapSingleUrl(
pageStatusCode: pageStatusCode, pageStatusCode: pageStatusCode,
pageError: pageError, pageError: pageError,
}, },
linksOnPage,
}; };
} }
@ -367,6 +375,7 @@ export async function scrapSingleUrl(
content: "", content: "",
markdown: "", markdown: "",
html: "", html: "",
linksOnPage: [],
metadata: { metadata: {
sourceURL: urlToScrap, sourceURL: urlToScrap,
pageStatusCode: pageStatusCode, pageStatusCode: pageStatusCode,

View File

@ -8,7 +8,11 @@ export const removeUnwantedElements = (
) => { ) => {
const soup = cheerio.load(html); const soup = cheerio.load(html);
if (pageOptions.onlyIncludeTags) { if (
pageOptions.onlyIncludeTags &&
pageOptions.onlyIncludeTags.length > 0 &&
pageOptions.onlyIncludeTags[0] !== ''
) {
if (typeof pageOptions.onlyIncludeTags === "string") { if (typeof pageOptions.onlyIncludeTags === "string") {
pageOptions.onlyIncludeTags = [pageOptions.onlyIncludeTags]; pageOptions.onlyIncludeTags = [pageOptions.onlyIncludeTags];
} }
@ -26,7 +30,11 @@ export const removeUnwantedElements = (
soup("script, style, iframe, noscript, meta, head").remove(); soup("script, style, iframe, noscript, meta, head").remove();
if (pageOptions.removeTags) { if (
pageOptions.removeTags &&
pageOptions.removeTags.length > 0 &&
pageOptions.removeTags[0] !== ''
) {
if (typeof pageOptions.removeTags === "string") { if (typeof pageOptions.removeTags === "string") {
pageOptions.removeTags = [pageOptions.removeTags]; pageOptions.removeTags = [pageOptions.removeTags];
} }

View File

@ -1,4 +1,6 @@
import axios from "axios"; import axios from "axios";
import * as cheerio from "cheerio";
export async function attemptScrapWithRequests( export async function attemptScrapWithRequests(
urlToScrap: string urlToScrap: string
@ -21,3 +23,35 @@ export async function attemptScrapWithRequests(
export function sanitizeText(text: string): string { export function sanitizeText(text: string): string {
return text.replace("\u0000", ""); return text.replace("\u0000", "");
} }
export function extractLinks(html: string, baseUrl: string): string[] {
const $ = cheerio.load(html);
const links: string[] = [];
// Parse the base URL to get the origin
const urlObject = new URL(baseUrl);
const origin = urlObject.origin;
$('a').each((_, element) => {
const href = $(element).attr('href');
if (href) {
if (href.startsWith('http://') || href.startsWith('https://')) {
// Absolute URL, add as is
links.push(href);
} else if (href.startsWith('/')) {
// Relative URL starting with '/', append to origin
links.push(`${origin}${href}`);
} else if (!href.startsWith('#') && !href.startsWith('mailto:')) {
// Relative URL not starting with '/', append to base URL
links.push(`${baseUrl}/${href}`);
} else if (href.startsWith('mailto:')) {
// mailto: links, add as is
links.push(href);
}
// Fragment-only links (#) are ignored
}
});
// Remove duplicates and return
return [...new Set(links)];
}

View File

@ -7,8 +7,10 @@ export function getWebScraperQueue() {
if (!webScraperQueue) { if (!webScraperQueue) {
webScraperQueue = new Queue("web-scraper", process.env.REDIS_URL, { webScraperQueue = new Queue("web-scraper", process.env.REDIS_URL, {
settings: { settings: {
lockDuration: 2 * 60 * 60 * 1000, // 2 hours in milliseconds, lockDuration: 2 * 60 * 1000, // 1 minute in milliseconds,
lockRenewTime: 30 * 60 * 1000, // 30 minutes in milliseconds lockRenewTime: 15 * 1000, // 15 seconds in milliseconds
stalledInterval: 30 * 1000,
maxStalledCount: 10,
}, },
}); });
console.log("Web scraper queue created"); console.log("Web scraper queue created");

View File

@ -6,6 +6,7 @@ import { startWebScraperPipeline } from "../main/runWebScraper";
import { callWebhook } from "./webhook"; import { callWebhook } from "./webhook";
import { logJob } from "./logging/log_job"; import { logJob } from "./logging/log_job";
import { initSDK } from '@hyperdx/node-opentelemetry'; import { initSDK } from '@hyperdx/node-opentelemetry';
import { Job } from "bull";
if(process.env.ENV === 'production') { if(process.env.ENV === 'production') {
initSDK({ initSDK({
@ -16,9 +17,8 @@ if(process.env.ENV === 'production') {
const wsq = getWebScraperQueue(); const wsq = getWebScraperQueue();
wsq.process( async function processJob(job: Job, done) {
Math.floor(Number(process.env.NUM_WORKERS_PER_QUEUE ?? 8)), console.log("taking job", job.id);
async function (job, done) {
try { try {
job.progress({ job.progress({
current: 1, current: 1,
@ -58,9 +58,12 @@ wsq.process(
pageOptions: job.data.pageOptions, pageOptions: job.data.pageOptions,
origin: job.data.origin, origin: job.data.origin,
}); });
console.log("job done", job.id);
done(null, data); done(null, data);
} catch (error) { } catch (error) {
console.log("job errored", job.id, error);
if (await getWebScraperQueue().isPaused(false)) { if (await getWebScraperQueue().isPaused(false)) {
console.log("queue is paused, ignoring");
return; return;
} }
@ -104,5 +107,9 @@ wsq.process(
}); });
done(null, data); done(null, data);
} }
} }
wsq.process(
Math.floor(Number(process.env.NUM_WORKERS_PER_QUEUE ?? 8)),
processJob
); );

View File

@ -1,48 +1,98 @@
import { getRateLimiter, serverRateLimiter, testSuiteRateLimiter, redisClient } from "./rate-limiter"; import {
getRateLimiter,
serverRateLimiter,
testSuiteRateLimiter,
redisRateLimitClient,
} from "./rate-limiter";
import { RateLimiterMode } from "../../src/types"; import { RateLimiterMode } from "../../src/types";
import { RateLimiterRedis } from "rate-limiter-flexible"; import { RateLimiterRedis } from "rate-limiter-flexible";
describe("Rate Limiter Service", () => { describe("Rate Limiter Service", () => {
beforeAll(async () => { beforeAll(async () => {
await redisClient.connect(); try {
await redisRateLimitClient.connect();
// if (process.env.REDIS_RATE_LIMIT_URL === "redis://localhost:6379") {
// console.log("Erasing all keys");
// // erase all the keys that start with "test-prefix"
// const keys = await redisRateLimitClient.keys("test-prefix:*");
// if (keys.length > 0) {
// await redisRateLimitClient.del(...keys);
// }
// }
} catch (error) {}
}); });
afterAll(async () => { afterAll(async () => {
await redisClient.disconnect(); try {
// if (process.env.REDIS_RATE_LIMIT_URL === "redis://localhost:6379") {
await redisRateLimitClient.disconnect();
// }
} catch (error) {}
}); });
it("should return the testSuiteRateLimiter for specific tokens", () => { it("should return the testSuiteRateLimiter for specific tokens", () => {
const limiter = getRateLimiter("crawl" as RateLimiterMode, "a01ccae"); const limiter = getRateLimiter(
"crawl" as RateLimiterMode,
"test-prefix:a01ccae"
);
expect(limiter).toBe(testSuiteRateLimiter); expect(limiter).toBe(testSuiteRateLimiter);
const limiter2 = getRateLimiter("scrape" as RateLimiterMode, "6254cf9"); const limiter2 = getRateLimiter(
"scrape" as RateLimiterMode,
"test-prefix:6254cf9"
);
expect(limiter2).toBe(testSuiteRateLimiter); expect(limiter2).toBe(testSuiteRateLimiter);
}); });
it("should return the serverRateLimiter if mode is not found", () => { it("should return the serverRateLimiter if mode is not found", () => {
const limiter = getRateLimiter("nonexistent" as RateLimiterMode, "someToken"); const limiter = getRateLimiter(
"nonexistent" as RateLimiterMode,
"test-prefix:someToken"
);
expect(limiter).toBe(serverRateLimiter); expect(limiter).toBe(serverRateLimiter);
}); });
it("should return the correct rate limiter based on mode and plan", () => { it("should return the correct rate limiter based on mode and plan", () => {
const limiter = getRateLimiter("crawl" as RateLimiterMode, "someToken", "free"); const limiter = getRateLimiter(
"crawl" as RateLimiterMode,
"test-prefix:someToken",
"free"
);
expect(limiter.points).toBe(2); expect(limiter.points).toBe(2);
const limiter2 = getRateLimiter("scrape" as RateLimiterMode, "someToken", "standard"); const limiter2 = getRateLimiter(
"scrape" as RateLimiterMode,
"test-prefix:someToken",
"standard"
);
expect(limiter2.points).toBe(50); expect(limiter2.points).toBe(50);
const limiter3 = getRateLimiter("search" as RateLimiterMode, "someToken", "growth"); const limiter3 = getRateLimiter(
"search" as RateLimiterMode,
"test-prefix:someToken",
"growth"
);
expect(limiter3.points).toBe(500); expect(limiter3.points).toBe(500);
const limiter4 = getRateLimiter("crawlStatus" as RateLimiterMode, "someToken", "growth"); const limiter4 = getRateLimiter(
"crawlStatus" as RateLimiterMode,
"test-prefix:someToken",
"growth"
);
expect(limiter4.points).toBe(150); expect(limiter4.points).toBe(150);
}); });
it("should return the default rate limiter if plan is not provided", () => { it("should return the default rate limiter if plan is not provided", () => {
const limiter = getRateLimiter("crawl" as RateLimiterMode, "someToken"); const limiter = getRateLimiter(
"crawl" as RateLimiterMode,
"test-prefix:someToken"
);
expect(limiter.points).toBe(3); expect(limiter.points).toBe(3);
const limiter2 = getRateLimiter("scrape" as RateLimiterMode, "someToken"); const limiter2 = getRateLimiter(
"scrape" as RateLimiterMode,
"test-prefix:someToken"
);
expect(limiter2.points).toBe(20); expect(limiter2.points).toBe(20);
}); });
@ -50,7 +100,7 @@ describe("Rate Limiter Service", () => {
const keyPrefix = "test-prefix"; const keyPrefix = "test-prefix";
const points = 10; const points = 10;
const limiter = new RateLimiterRedis({ const limiter = new RateLimiterRedis({
storeClient: redisClient, storeClient: redisRateLimitClient,
keyPrefix, keyPrefix,
points, points,
duration: 60, duration: 60,
@ -62,26 +112,253 @@ describe("Rate Limiter Service", () => {
}); });
it("should return the correct rate limiter for 'preview' mode", () => { it("should return the correct rate limiter for 'preview' mode", () => {
const limiter = getRateLimiter("preview" as RateLimiterMode, "someToken", "free"); const limiter = getRateLimiter(
"preview" as RateLimiterMode,
"test-prefix:someToken",
"free"
);
expect(limiter.points).toBe(5); expect(limiter.points).toBe(5);
const limiter2 = getRateLimiter("preview" as RateLimiterMode, "someToken"); const limiter2 = getRateLimiter(
"preview" as RateLimiterMode,
"test-prefix:someToken"
);
expect(limiter2.points).toBe(5); expect(limiter2.points).toBe(5);
}); });
it("should return the correct rate limiter for 'account' mode", () => { it("should return the correct rate limiter for 'account' mode", () => {
const limiter = getRateLimiter("account" as RateLimiterMode, "someToken", "free"); const limiter = getRateLimiter(
"account" as RateLimiterMode,
"test-prefix:someToken",
"free"
);
expect(limiter.points).toBe(100); expect(limiter.points).toBe(100);
const limiter2 = getRateLimiter("account" as RateLimiterMode, "someToken"); const limiter2 = getRateLimiter(
"account" as RateLimiterMode,
"test-prefix:someToken"
);
expect(limiter2.points).toBe(100); expect(limiter2.points).toBe(100);
}); });
it("should return the correct rate limiter for 'crawlStatus' mode", () => { it("should return the correct rate limiter for 'crawlStatus' mode", () => {
const limiter = getRateLimiter("crawlStatus" as RateLimiterMode, "someToken", "free"); const limiter = getRateLimiter(
"crawlStatus" as RateLimiterMode,
"test-prefix:someToken",
"free"
);
expect(limiter.points).toBe(150); expect(limiter.points).toBe(150);
const limiter2 = getRateLimiter("crawlStatus" as RateLimiterMode, "someToken"); const limiter2 = getRateLimiter(
"crawlStatus" as RateLimiterMode,
"test-prefix:someToken"
);
expect(limiter2.points).toBe(150); expect(limiter2.points).toBe(150);
}); });
it("should consume points correctly for 'crawl' mode", async () => {
const limiter = getRateLimiter(
"crawl" as RateLimiterMode,
"test-prefix:someTokenCRAWL",
"free"
);
const consumePoints = 1;
const res = await limiter.consume(
"test-prefix:someTokenCRAWL",
consumePoints
);
expect(res.remainingPoints).toBe(1);
});
it("should consume points correctly for 'scrape' mode (DEFAULT)", async () => {
const limiter = getRateLimiter(
"scrape" as RateLimiterMode,
"test-prefix:someTokenX"
);
const consumePoints = 4;
const res = await limiter.consume("test-prefix:someTokenX", consumePoints);
expect(res.remainingPoints).toBe(16);
});
it("should consume points correctly for 'scrape' mode (HOBBY)", async () => {
const limiter = getRateLimiter(
"scrape" as RateLimiterMode,
"test-prefix:someTokenXY",
"hobby"
);
// expect hobby to have 100 points
expect(limiter.points).toBe(10);
const consumePoints = 5;
const res = await limiter.consume("test-prefix:someTokenXY", consumePoints);
expect(res.consumedPoints).toBe(5);
expect(res.remainingPoints).toBe(5);
});
it("should return the correct rate limiter for 'crawl' mode", () => {
const limiter = getRateLimiter(
"crawl" as RateLimiterMode,
"test-prefix:someToken",
"free"
);
expect(limiter.points).toBe(2);
const limiter2 = getRateLimiter(
"crawl" as RateLimiterMode,
"test-prefix:someToken",
"starter"
);
expect(limiter2.points).toBe(3);
const limiter3 = getRateLimiter(
"crawl" as RateLimiterMode,
"test-prefix:someToken",
"standard"
);
expect(limiter3.points).toBe(5);
});
it("should return the correct rate limiter for 'scrape' mode", () => {
const limiter = getRateLimiter(
"scrape" as RateLimiterMode,
"test-prefix:someToken",
"free"
);
expect(limiter.points).toBe(5);
const limiter2 = getRateLimiter(
"scrape" as RateLimiterMode,
"test-prefix:someToken",
"starter"
);
expect(limiter2.points).toBe(20);
const limiter3 = getRateLimiter(
"scrape" as RateLimiterMode,
"test-prefix:someToken",
"standard"
);
expect(limiter3.points).toBe(50);
});
it("should return the correct rate limiter for 'search' mode", () => {
const limiter = getRateLimiter(
"search" as RateLimiterMode,
"test-prefix:someToken",
"free"
);
expect(limiter.points).toBe(5);
const limiter2 = getRateLimiter(
"search" as RateLimiterMode,
"test-prefix:someToken",
"starter"
);
expect(limiter2.points).toBe(20);
const limiter3 = getRateLimiter(
"search" as RateLimiterMode,
"test-prefix:someToken",
"standard"
);
expect(limiter3.points).toBe(40);
});
it("should return the correct rate limiter for 'preview' mode", () => {
const limiter = getRateLimiter(
"preview" as RateLimiterMode,
"test-prefix:someToken",
"free"
);
expect(limiter.points).toBe(5);
const limiter2 = getRateLimiter(
"preview" as RateLimiterMode,
"test-prefix:someToken"
);
expect(limiter2.points).toBe(5);
});
it("should return the correct rate limiter for 'account' mode", () => {
const limiter = getRateLimiter(
"account" as RateLimiterMode,
"test-prefix:someToken",
"free"
);
expect(limiter.points).toBe(100);
const limiter2 = getRateLimiter(
"account" as RateLimiterMode,
"test-prefix:someToken"
);
expect(limiter2.points).toBe(100);
});
it("should return the correct rate limiter for 'crawlStatus' mode", () => {
const limiter = getRateLimiter(
"crawlStatus" as RateLimiterMode,
"test-prefix:someToken",
"free"
);
expect(limiter.points).toBe(150);
const limiter2 = getRateLimiter(
"crawlStatus" as RateLimiterMode,
"test-prefix:someToken"
);
expect(limiter2.points).toBe(150);
});
it("should return the correct rate limiter for 'testSuite' mode", () => {
const limiter = getRateLimiter(
"testSuite" as RateLimiterMode,
"test-prefix:someToken",
"free"
);
expect(limiter.points).toBe(10000);
const limiter2 = getRateLimiter(
"testSuite" as RateLimiterMode,
"test-prefix:someToken"
);
expect(limiter2.points).toBe(10000);
});
it("should throw an error when consuming more points than available", async () => {
const limiter = getRateLimiter(
"crawl" as RateLimiterMode,
"test-prefix:someToken"
);
const consumePoints = limiter.points + 1;
try {
await limiter.consume("test-prefix:someToken", consumePoints);
} catch (error) {
// expect remaining points to be 0
const res = await limiter.get("test-prefix:someToken");
expect(res.remainingPoints).toBe(0);
}
});
it("should reset points after duration", async () => {
const keyPrefix = "test-prefix";
const points = 10;
const duration = 1; // 1 second
const limiter = new RateLimiterRedis({
storeClient: redisRateLimitClient,
keyPrefix,
points,
duration,
});
const consumePoints = 5;
await limiter.consume("test-prefix:someToken", consumePoints);
await new Promise((resolve) => setTimeout(resolve, duration * 1000 + 100)); // Wait for duration + 100ms
const res = await limiter.consume("test-prefix:someToken", consumePoints);
expect(res.remainingPoints).toBe(points - consumePoints);
});
}); });

View File

@ -1,6 +1,6 @@
import { RateLimiterRedis } from "rate-limiter-flexible"; import { RateLimiterRedis } from "rate-limiter-flexible";
import * as redis from "redis";
import { RateLimiterMode } from "../../src/types"; import { RateLimiterMode } from "../../src/types";
import Redis from "ioredis";
const RATE_LIMITS = { const RATE_LIMITS = {
crawl: { crawl: {
@ -57,14 +57,13 @@ const RATE_LIMITS = {
}, },
}; };
export const redisClient = redis.createClient({ export const redisRateLimitClient = new Redis(
url: process.env.REDIS_URL, process.env.REDIS_RATE_LIMIT_URL
legacyMode: true, )
});
const createRateLimiter = (keyPrefix, points) => const createRateLimiter = (keyPrefix, points) =>
new RateLimiterRedis({ new RateLimiterRedis({
storeClient: redisClient, storeClient: redisRateLimitClient,
keyPrefix, keyPrefix,
points, points,
duration: 60, // Duration in seconds duration: 60, // Duration in seconds
@ -76,7 +75,7 @@ export const serverRateLimiter = createRateLimiter(
); );
export const testSuiteRateLimiter = new RateLimiterRedis({ export const testSuiteRateLimiter = new RateLimiterRedis({
storeClient: redisClient, storeClient: redisRateLimitClient,
keyPrefix: "test-suite", keyPrefix: "test-suite",
points: 10000, points: 10000,
duration: 60, // Duration in seconds duration: 60, // Duration in seconds

View File

@ -1,10 +1,8 @@
import Redis from "ioredis"; import Redis from "ioredis";
import { redisRateLimitClient } from "./rate-limiter";
// Initialize Redis client
const redis = new Redis(process.env.REDIS_URL);
// Listen to 'error' events to the Redis connection // Listen to 'error' events to the Redis connection
redis.on("error", (error) => { redisRateLimitClient.on("error", (error) => {
try { try {
if (error.message === "ECONNRESET") { if (error.message === "ECONNRESET") {
console.log("Connection to Redis Session Store timed out."); console.log("Connection to Redis Session Store timed out.");
@ -15,16 +13,16 @@ redis.on("error", (error) => {
}); });
// Listen to 'reconnecting' event to Redis // Listen to 'reconnecting' event to Redis
redis.on("reconnecting", (err) => { redisRateLimitClient.on("reconnecting", (err) => {
try { try {
if (redis.status === "reconnecting") if (redisRateLimitClient.status === "reconnecting")
console.log("Reconnecting to Redis Session Store..."); console.log("Reconnecting to Redis Session Store...");
else console.log("Error reconnecting to Redis Session Store."); else console.log("Error reconnecting to Redis Session Store.");
} catch (error) {} } catch (error) {}
}); });
// Listen to the 'connect' event to Redis // Listen to the 'connect' event to Redis
redis.on("connect", (err) => { redisRateLimitClient.on("connect", (err) => {
try { try {
if (!err) console.log("Connected to Redis Session Store!"); if (!err) console.log("Connected to Redis Session Store!");
} catch (error) {} } catch (error) {}
@ -38,9 +36,9 @@ redis.on("connect", (err) => {
*/ */
const setValue = async (key: string, value: string, expire?: number) => { const setValue = async (key: string, value: string, expire?: number) => {
if (expire) { if (expire) {
await redis.set(key, value, "EX", expire); await redisRateLimitClient.set(key, value, "EX", expire);
} else { } else {
await redis.set(key, value); await redisRateLimitClient.set(key, value);
} }
}; };
@ -50,7 +48,7 @@ const setValue = async (key: string, value: string, expire?: number) => {
* @returns {Promise<string|null>} The value, if found, otherwise null. * @returns {Promise<string|null>} The value, if found, otherwise null.
*/ */
const getValue = async (key: string): Promise<string | null> => { const getValue = async (key: string): Promise<string | null> => {
const value = await redis.get(key); const value = await redisRateLimitClient.get(key);
return value; return value;
}; };
@ -59,7 +57,7 @@ const getValue = async (key: string): Promise<string | null> => {
* @param {string} key The key to delete. * @param {string} key The key to delete.
*/ */
const deleteKey = async (key: string) => { const deleteKey = async (key: string) => {
await redis.del(key); await redisRateLimitClient.del(key);
}; };
export { setValue, getValue, deleteKey }; export { setValue, getValue, deleteKey };

View File

@ -1,9 +0,0 @@
fetch(process.argv[2] + "/admin/" + process.env.BULL_AUTH_KEY + "/shutdown", {
method: "POST"
}).then(async x => {
console.log(await x.text());
process.exit(0);
}).catch(e => {
console.error(e);
process.exit(1);
});

View File

@ -157,8 +157,14 @@ export default class FirecrawlApp {
return { return {
success: true, success: true,
status: response.data.status, status: response.data.status,
current: response.data.current,
current_url: response.data.current_url,
current_step: response.data.current_step,
total: response.data.total,
data: response.data.data, data: response.data.data,
partial_data: !response.data.data ? response.data.partial_data : undefined, partial_data: !response.data.data
? response.data.partial_data
: undefined,
}; };
} }
else { else {
@ -171,6 +177,10 @@ export default class FirecrawlApp {
return { return {
success: false, success: false,
status: "unknown", status: "unknown",
current: 0,
current_url: "",
current_step: "",
total: 0,
error: "Internal server error.", error: "Internal server error.",
}; };
}); });
@ -180,7 +190,7 @@ export default class FirecrawlApp {
* @returns {AxiosRequestHeaders} The prepared headers. * @returns {AxiosRequestHeaders} The prepared headers.
*/ */
prepareHeaders(idempotencyKey) { prepareHeaders(idempotencyKey) {
return Object.assign({ 'Content-Type': 'application/json', 'Authorization': `Bearer ${this.apiKey}` }, (idempotencyKey ? { 'x-idempotency-key': idempotencyKey } : {})); return Object.assign({ "Content-Type": "application/json", Authorization: `Bearer ${this.apiKey}` }, (idempotencyKey ? { "x-idempotency-key": idempotencyKey } : {}));
} }
/** /**
* Sends a POST request to the specified URL. * Sends a POST request to the specified URL.

View File

@ -1,5 +0,0 @@
/** @type {import('ts-jest').JestConfigWithTsJest} */
module.exports = {
preset: 'ts-jest',
testEnvironment: 'node',
};

View File

@ -0,0 +1,16 @@
/** @type {import('ts-jest').JestConfigWithTsJest} **/
export default {
testEnvironment: "node",
"moduleNameMapper": {
"^(\\.{1,2}/.*)\\.js$": "$1",
},
"extensionsToTreatAsEsm": [".ts"],
"transform": {
"^.+\\.(mt|t|cj|j)s$": [
"ts-jest",
{
"useESM": true
}
]
},
};

View File

@ -1,12 +1,12 @@
{ {
"name": "@mendable/firecrawl-js", "name": "@mendable/firecrawl-js",
"version": "0.0.28", "version": "0.0.29",
"lockfileVersion": 3, "lockfileVersion": 3,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "@mendable/firecrawl-js", "name": "@mendable/firecrawl-js",
"version": "0.0.28", "version": "0.0.29",
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"axios": "^1.6.8", "axios": "^1.6.8",
@ -24,7 +24,7 @@
"@types/node": "^20.12.12", "@types/node": "^20.12.12",
"@types/uuid": "^9.0.8", "@types/uuid": "^9.0.8",
"jest": "^29.7.0", "jest": "^29.7.0",
"ts-jest": "^29.1.2", "ts-jest": "^29.2.2",
"typescript": "^5.4.5" "typescript": "^5.4.5"
} }
}, },
@ -42,12 +42,12 @@
} }
}, },
"node_modules/@babel/code-frame": { "node_modules/@babel/code-frame": {
"version": "7.24.2", "version": "7.24.7",
"resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.24.2.tgz", "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.24.7.tgz",
"integrity": "sha512-y5+tLQyV8pg3fsiln67BVLD1P13Eg4lh5RW9mF0zUuvLrv9uIQ4MCL+CRT+FTsBlBjcIan6PGsLcBN0m3ClUyQ==", "integrity": "sha512-BcYH1CVJBO9tvyIZ2jVeXgSIMvGZ2FDRvDdOIVQyuklNKSsx+eppDEBq/g47Ayw+RqNFE+URvOShmf+f/qwAlA==",
"dev": true, "dev": true,
"dependencies": { "dependencies": {
"@babel/highlight": "^7.24.2", "@babel/highlight": "^7.24.7",
"picocolors": "^1.0.0" "picocolors": "^1.0.0"
}, },
"engines": { "engines": {
@ -55,9 +55,9 @@
} }
}, },
"node_modules/@babel/compat-data": { "node_modules/@babel/compat-data": {
"version": "7.24.4", "version": "7.24.9",
"resolved": "https://registry.npmjs.org/@babel/compat-data/-/compat-data-7.24.4.tgz", "resolved": "https://registry.npmjs.org/@babel/compat-data/-/compat-data-7.24.9.tgz",
"integrity": "sha512-vg8Gih2MLK+kOkHJp4gBEIkyaIi00jgWot2D9QOmmfLC8jINSOzmCLta6Bvz/JSBCqnegV0L80jhxkol5GWNfQ==", "integrity": "sha512-e701mcfApCJqMMueQI0Fb68Amflj83+dvAvHawoBpAz+GDjCIyGHzNwnefjsWJ3xiYAqqiQFoWbspGYBdb2/ng==",
"dev": true, "dev": true,
"engines": { "engines": {
"node": ">=6.9.0" "node": ">=6.9.0"
@ -94,12 +94,12 @@
} }
}, },
"node_modules/@babel/generator": { "node_modules/@babel/generator": {
"version": "7.24.4", "version": "7.24.10",
"resolved": "https://registry.npmjs.org/@babel/generator/-/generator-7.24.4.tgz", "resolved": "https://registry.npmjs.org/@babel/generator/-/generator-7.24.10.tgz",
"integrity": "sha512-Xd6+v6SnjWVx/nus+y0l1sxMOTOMBkyL4+BIdbALyatQnAe/SRVjANeDPSCYaX+i1iJmuGSKf3Z+E+V/va1Hvw==", "integrity": "sha512-o9HBZL1G2129luEUlG1hB4N/nlYNWHnpwlND9eOMclRqqu1YDy2sSYVCFUZwl8I1Gxh+QSRrP2vD7EpUmFVXxg==",
"dev": true, "dev": true,
"dependencies": { "dependencies": {
"@babel/types": "^7.24.0", "@babel/types": "^7.24.9",
"@jridgewell/gen-mapping": "^0.3.5", "@jridgewell/gen-mapping": "^0.3.5",
"@jridgewell/trace-mapping": "^0.3.25", "@jridgewell/trace-mapping": "^0.3.25",
"jsesc": "^2.5.1" "jsesc": "^2.5.1"
@ -109,14 +109,14 @@
} }
}, },
"node_modules/@babel/helper-compilation-targets": { "node_modules/@babel/helper-compilation-targets": {
"version": "7.23.6", "version": "7.24.8",
"resolved": "https://registry.npmjs.org/@babel/helper-compilation-targets/-/helper-compilation-targets-7.23.6.tgz", "resolved": "https://registry.npmjs.org/@babel/helper-compilation-targets/-/helper-compilation-targets-7.24.8.tgz",
"integrity": "sha512-9JB548GZoQVmzrFgp8o7KxdgkTGm6xs9DW0o/Pim72UDjzr5ObUQ6ZzYPqA+g9OTS2bBQoctLJrky0RDCAWRgQ==", "integrity": "sha512-oU+UoqCHdp+nWVDkpldqIQL/i/bvAv53tRqLG/s+cOXxe66zOYLU7ar/Xs3LdmBihrUMEUhwu6dMZwbNOYDwvw==",
"dev": true, "dev": true,
"dependencies": { "dependencies": {
"@babel/compat-data": "^7.23.5", "@babel/compat-data": "^7.24.8",
"@babel/helper-validator-option": "^7.23.5", "@babel/helper-validator-option": "^7.24.8",
"browserslist": "^4.22.2", "browserslist": "^4.23.1",
"lru-cache": "^5.1.1", "lru-cache": "^5.1.1",
"semver": "^6.3.1" "semver": "^6.3.1"
}, },
@ -125,62 +125,66 @@
} }
}, },
"node_modules/@babel/helper-environment-visitor": { "node_modules/@babel/helper-environment-visitor": {
"version": "7.22.20", "version": "7.24.7",
"resolved": "https://registry.npmjs.org/@babel/helper-environment-visitor/-/helper-environment-visitor-7.22.20.tgz", "resolved": "https://registry.npmjs.org/@babel/helper-environment-visitor/-/helper-environment-visitor-7.24.7.tgz",
"integrity": "sha512-zfedSIzFhat/gFhWfHtgWvlec0nqB9YEIVrpuwjruLlXfUSnA8cJB0miHKwqDnQ7d32aKo2xt88/xZptwxbfhA==", "integrity": "sha512-DoiN84+4Gnd0ncbBOM9AZENV4a5ZiL39HYMyZJGZ/AZEykHYdJw0wW3kdcsh9/Kn+BRXHLkkklZ51ecPKmI1CQ==",
"dev": true, "dev": true,
"dependencies": {
"@babel/types": "^7.24.7"
},
"engines": { "engines": {
"node": ">=6.9.0" "node": ">=6.9.0"
} }
}, },
"node_modules/@babel/helper-function-name": { "node_modules/@babel/helper-function-name": {
"version": "7.23.0", "version": "7.24.7",
"resolved": "https://registry.npmjs.org/@babel/helper-function-name/-/helper-function-name-7.23.0.tgz", "resolved": "https://registry.npmjs.org/@babel/helper-function-name/-/helper-function-name-7.24.7.tgz",
"integrity": "sha512-OErEqsrxjZTJciZ4Oo+eoZqeW9UIiOcuYKRJA4ZAgV9myA+pOXhhmpfNCKjEH/auVfEYVFJ6y1Tc4r0eIApqiw==", "integrity": "sha512-FyoJTsj/PEUWu1/TYRiXTIHc8lbw+TDYkZuoE43opPS5TrI7MyONBE1oNvfguEXAD9yhQRrVBnXdXzSLQl9XnA==",
"dev": true, "dev": true,
"dependencies": { "dependencies": {
"@babel/template": "^7.22.15", "@babel/template": "^7.24.7",
"@babel/types": "^7.23.0" "@babel/types": "^7.24.7"
}, },
"engines": { "engines": {
"node": ">=6.9.0" "node": ">=6.9.0"
} }
}, },
"node_modules/@babel/helper-hoist-variables": { "node_modules/@babel/helper-hoist-variables": {
"version": "7.22.5", "version": "7.24.7",
"resolved": "https://registry.npmjs.org/@babel/helper-hoist-variables/-/helper-hoist-variables-7.22.5.tgz", "resolved": "https://registry.npmjs.org/@babel/helper-hoist-variables/-/helper-hoist-variables-7.24.7.tgz",
"integrity": "sha512-wGjk9QZVzvknA6yKIUURb8zY3grXCcOZt+/7Wcy8O2uctxhplmUPkOdlgoNhmdVee2c92JXbf1xpMtVNbfoxRw==", "integrity": "sha512-MJJwhkoGy5c4ehfoRyrJ/owKeMl19U54h27YYftT0o2teQ3FJ3nQUf/I3LlJsX4l3qlw7WRXUmiyajvHXoTubQ==",
"dev": true, "dev": true,
"dependencies": { "dependencies": {
"@babel/types": "^7.22.5" "@babel/types": "^7.24.7"
}, },
"engines": { "engines": {
"node": ">=6.9.0" "node": ">=6.9.0"
} }
}, },
"node_modules/@babel/helper-module-imports": { "node_modules/@babel/helper-module-imports": {
"version": "7.24.3", "version": "7.24.7",
"resolved": "https://registry.npmjs.org/@babel/helper-module-imports/-/helper-module-imports-7.24.3.tgz", "resolved": "https://registry.npmjs.org/@babel/helper-module-imports/-/helper-module-imports-7.24.7.tgz",
"integrity": "sha512-viKb0F9f2s0BCS22QSF308z/+1YWKV/76mwt61NBzS5izMzDPwdq1pTrzf+Li3npBWX9KdQbkeCt1jSAM7lZqg==", "integrity": "sha512-8AyH3C+74cgCVVXow/myrynrAGv+nTVg5vKu2nZph9x7RcRwzmh0VFallJuFTZ9mx6u4eSdXZfcOzSqTUm0HCA==",
"dev": true, "dev": true,
"dependencies": { "dependencies": {
"@babel/types": "^7.24.0" "@babel/traverse": "^7.24.7",
"@babel/types": "^7.24.7"
}, },
"engines": { "engines": {
"node": ">=6.9.0" "node": ">=6.9.0"
} }
}, },
"node_modules/@babel/helper-module-transforms": { "node_modules/@babel/helper-module-transforms": {
"version": "7.23.3", "version": "7.24.9",
"resolved": "https://registry.npmjs.org/@babel/helper-module-transforms/-/helper-module-transforms-7.23.3.tgz", "resolved": "https://registry.npmjs.org/@babel/helper-module-transforms/-/helper-module-transforms-7.24.9.tgz",
"integrity": "sha512-7bBs4ED9OmswdfDzpz4MpWgSrV7FXlc3zIagvLFjS5H+Mk7Snr21vQ6QwrsoCGMfNC4e4LQPdoULEt4ykz0SRQ==", "integrity": "sha512-oYbh+rtFKj/HwBQkFlUzvcybzklmVdVV3UU+mN7n2t/q3yGHbuVdNxyFvSBO1tfvjyArpHNcWMAzsSPdyI46hw==",
"dev": true, "dev": true,
"dependencies": { "dependencies": {
"@babel/helper-environment-visitor": "^7.22.20", "@babel/helper-environment-visitor": "^7.24.7",
"@babel/helper-module-imports": "^7.22.15", "@babel/helper-module-imports": "^7.24.7",
"@babel/helper-simple-access": "^7.22.5", "@babel/helper-simple-access": "^7.24.7",
"@babel/helper-split-export-declaration": "^7.22.6", "@babel/helper-split-export-declaration": "^7.24.7",
"@babel/helper-validator-identifier": "^7.22.20" "@babel/helper-validator-identifier": "^7.24.7"
}, },
"engines": { "engines": {
"node": ">=6.9.0" "node": ">=6.9.0"
@ -190,60 +194,61 @@
} }
}, },
"node_modules/@babel/helper-plugin-utils": { "node_modules/@babel/helper-plugin-utils": {
"version": "7.24.0", "version": "7.24.8",
"resolved": "https://registry.npmjs.org/@babel/helper-plugin-utils/-/helper-plugin-utils-7.24.0.tgz", "resolved": "https://registry.npmjs.org/@babel/helper-plugin-utils/-/helper-plugin-utils-7.24.8.tgz",
"integrity": "sha512-9cUznXMG0+FxRuJfvL82QlTqIzhVW9sL0KjMPHhAOOvpQGL8QtdxnBKILjBqxlHyliz0yCa1G903ZXI/FuHy2w==", "integrity": "sha512-FFWx5142D8h2Mgr/iPVGH5G7w6jDn4jUSpZTyDnQO0Yn7Ks2Kuz6Pci8H6MPCoUJegd/UZQ3tAvfLCxQSnWWwg==",
"dev": true, "dev": true,
"engines": { "engines": {
"node": ">=6.9.0" "node": ">=6.9.0"
} }
}, },
"node_modules/@babel/helper-simple-access": { "node_modules/@babel/helper-simple-access": {
"version": "7.22.5", "version": "7.24.7",
"resolved": "https://registry.npmjs.org/@babel/helper-simple-access/-/helper-simple-access-7.22.5.tgz", "resolved": "https://registry.npmjs.org/@babel/helper-simple-access/-/helper-simple-access-7.24.7.tgz",
"integrity": "sha512-n0H99E/K+Bika3++WNL17POvo4rKWZ7lZEp1Q+fStVbUi8nxPQEBOlTmCOxW/0JsS56SKKQ+ojAe2pHKJHN35w==", "integrity": "sha512-zBAIvbCMh5Ts+b86r/CjU+4XGYIs+R1j951gxI3KmmxBMhCg4oQMsv6ZXQ64XOm/cvzfU1FmoCyt6+owc5QMYg==",
"dev": true, "dev": true,
"dependencies": { "dependencies": {
"@babel/types": "^7.22.5" "@babel/traverse": "^7.24.7",
"@babel/types": "^7.24.7"
}, },
"engines": { "engines": {
"node": ">=6.9.0" "node": ">=6.9.0"
} }
}, },
"node_modules/@babel/helper-split-export-declaration": { "node_modules/@babel/helper-split-export-declaration": {
"version": "7.22.6", "version": "7.24.7",
"resolved": "https://registry.npmjs.org/@babel/helper-split-export-declaration/-/helper-split-export-declaration-7.22.6.tgz", "resolved": "https://registry.npmjs.org/@babel/helper-split-export-declaration/-/helper-split-export-declaration-7.24.7.tgz",
"integrity": "sha512-AsUnxuLhRYsisFiaJwvp1QF+I3KjD5FOxut14q/GzovUe6orHLesW2C7d754kRm53h5gqrz6sFl6sxc4BVtE/g==", "integrity": "sha512-oy5V7pD+UvfkEATUKvIjvIAH/xCzfsFVw7ygW2SI6NClZzquT+mwdTfgfdbUiceh6iQO0CHtCPsyze/MZ2YbAA==",
"dev": true, "dev": true,
"dependencies": { "dependencies": {
"@babel/types": "^7.22.5" "@babel/types": "^7.24.7"
}, },
"engines": { "engines": {
"node": ">=6.9.0" "node": ">=6.9.0"
} }
}, },
"node_modules/@babel/helper-string-parser": { "node_modules/@babel/helper-string-parser": {
"version": "7.24.1", "version": "7.24.8",
"resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.24.1.tgz", "resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.24.8.tgz",
"integrity": "sha512-2ofRCjnnA9y+wk8b9IAREroeUP02KHp431N2mhKniy2yKIDKpbrHv9eXwm8cBeWQYcJmzv5qKCu65P47eCF7CQ==", "integrity": "sha512-pO9KhhRcuUyGnJWwyEgnRJTSIZHiT+vMD0kPeD+so0l7mxkMT19g3pjY9GTnHySck/hDzq+dtW/4VgnMkippsQ==",
"dev": true, "dev": true,
"engines": { "engines": {
"node": ">=6.9.0" "node": ">=6.9.0"
} }
}, },
"node_modules/@babel/helper-validator-identifier": { "node_modules/@babel/helper-validator-identifier": {
"version": "7.22.20", "version": "7.24.7",
"resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.22.20.tgz", "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.24.7.tgz",
"integrity": "sha512-Y4OZ+ytlatR8AI+8KZfKuL5urKp7qey08ha31L8b3BwewJAoJamTzyvxPR/5D+KkdJCGPq/+8TukHBlY10FX9A==", "integrity": "sha512-rR+PBcQ1SMQDDyF6X0wxtG8QyLCgUB0eRAGguqRLfkCA87l7yAP7ehq8SNj96OOGTO8OBV70KhuFYcIkHXOg0w==",
"dev": true, "dev": true,
"engines": { "engines": {
"node": ">=6.9.0" "node": ">=6.9.0"
} }
}, },
"node_modules/@babel/helper-validator-option": { "node_modules/@babel/helper-validator-option": {
"version": "7.23.5", "version": "7.24.8",
"resolved": "https://registry.npmjs.org/@babel/helper-validator-option/-/helper-validator-option-7.23.5.tgz", "resolved": "https://registry.npmjs.org/@babel/helper-validator-option/-/helper-validator-option-7.24.8.tgz",
"integrity": "sha512-85ttAOMLsr53VgXkTbkx8oA6YTfT4q7/HzXSLEYmjcSTJPMPQtvq1BD79Byep5xMUYbGRzEpDsjUf3dyp54IKw==", "integrity": "sha512-xb8t9tD1MHLungh/AIoWYN+gVHaB9kwlu8gffXGSt3FFEIT7RjS+xWbc2vUD1UTZdIpKj/ab3rdqJ7ufngyi2Q==",
"dev": true, "dev": true,
"engines": { "engines": {
"node": ">=6.9.0" "node": ">=6.9.0"
@ -264,12 +269,12 @@
} }
}, },
"node_modules/@babel/highlight": { "node_modules/@babel/highlight": {
"version": "7.24.2", "version": "7.24.7",
"resolved": "https://registry.npmjs.org/@babel/highlight/-/highlight-7.24.2.tgz", "resolved": "https://registry.npmjs.org/@babel/highlight/-/highlight-7.24.7.tgz",
"integrity": "sha512-Yac1ao4flkTxTteCDZLEvdxg2fZfz1v8M4QpaGypq/WPDqg3ijHYbDfs+LG5hvzSoqaSZ9/Z9lKSP3CjZjv+pA==", "integrity": "sha512-EStJpq4OuY8xYfhGVXngigBJRWxftKX9ksiGDnmlY3o7B/V7KIAc9X4oiK87uPJSc/vs5L869bem5fhZa8caZw==",
"dev": true, "dev": true,
"dependencies": { "dependencies": {
"@babel/helper-validator-identifier": "^7.22.20", "@babel/helper-validator-identifier": "^7.24.7",
"chalk": "^2.4.2", "chalk": "^2.4.2",
"js-tokens": "^4.0.0", "js-tokens": "^4.0.0",
"picocolors": "^1.0.0" "picocolors": "^1.0.0"
@ -350,9 +355,9 @@
} }
}, },
"node_modules/@babel/parser": { "node_modules/@babel/parser": {
"version": "7.24.4", "version": "7.24.8",
"resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.24.4.tgz", "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.24.8.tgz",
"integrity": "sha512-zTvEBcghmeBma9QIGunWevvBAp4/Qu9Bdq+2k0Ot4fVMD6v3dsC9WOcRSKk7tRRyBM/53yKMJko9xOatGQAwSg==", "integrity": "sha512-WzfbgXOkGzZiXXCqk43kKwZjzwx4oulxZi3nq2TYL9mOjQv6kYwul9mz6ID36njuL7Xkp6nJEfok848Zj10j/w==",
"dev": true, "dev": true,
"bin": { "bin": {
"parser": "bin/babel-parser.js" "parser": "bin/babel-parser.js"
@ -539,33 +544,33 @@
} }
}, },
"node_modules/@babel/template": { "node_modules/@babel/template": {
"version": "7.24.0", "version": "7.24.7",
"resolved": "https://registry.npmjs.org/@babel/template/-/template-7.24.0.tgz", "resolved": "https://registry.npmjs.org/@babel/template/-/template-7.24.7.tgz",
"integrity": "sha512-Bkf2q8lMB0AFpX0NFEqSbx1OkTHf0f+0j82mkw+ZpzBnkk7e9Ql0891vlfgi+kHwOk8tQjiQHpqh4LaSa0fKEA==", "integrity": "sha512-jYqfPrU9JTF0PmPy1tLYHW4Mp4KlgxJD9l2nP9fD6yT/ICi554DmrWBAEYpIelzjHf1msDP3PxJIRt/nFNfBig==",
"dev": true, "dev": true,
"dependencies": { "dependencies": {
"@babel/code-frame": "^7.23.5", "@babel/code-frame": "^7.24.7",
"@babel/parser": "^7.24.0", "@babel/parser": "^7.24.7",
"@babel/types": "^7.24.0" "@babel/types": "^7.24.7"
}, },
"engines": { "engines": {
"node": ">=6.9.0" "node": ">=6.9.0"
} }
}, },
"node_modules/@babel/traverse": { "node_modules/@babel/traverse": {
"version": "7.24.1", "version": "7.24.8",
"resolved": "https://registry.npmjs.org/@babel/traverse/-/traverse-7.24.1.tgz", "resolved": "https://registry.npmjs.org/@babel/traverse/-/traverse-7.24.8.tgz",
"integrity": "sha512-xuU6o9m68KeqZbQuDt2TcKSxUw/mrsvavlEqQ1leZ/B+C9tk6E4sRWy97WaXgvq5E+nU3cXMxv3WKOCanVMCmQ==", "integrity": "sha512-t0P1xxAPzEDcEPmjprAQq19NWum4K0EQPjMwZQZbHt+GiZqvjCHjj755Weq1YRPVzBI+3zSfvScfpnuIecVFJQ==",
"dev": true, "dev": true,
"dependencies": { "dependencies": {
"@babel/code-frame": "^7.24.1", "@babel/code-frame": "^7.24.7",
"@babel/generator": "^7.24.1", "@babel/generator": "^7.24.8",
"@babel/helper-environment-visitor": "^7.22.20", "@babel/helper-environment-visitor": "^7.24.7",
"@babel/helper-function-name": "^7.23.0", "@babel/helper-function-name": "^7.24.7",
"@babel/helper-hoist-variables": "^7.22.5", "@babel/helper-hoist-variables": "^7.24.7",
"@babel/helper-split-export-declaration": "^7.22.6", "@babel/helper-split-export-declaration": "^7.24.7",
"@babel/parser": "^7.24.1", "@babel/parser": "^7.24.8",
"@babel/types": "^7.24.0", "@babel/types": "^7.24.8",
"debug": "^4.3.1", "debug": "^4.3.1",
"globals": "^11.1.0" "globals": "^11.1.0"
}, },
@ -574,13 +579,13 @@
} }
}, },
"node_modules/@babel/types": { "node_modules/@babel/types": {
"version": "7.24.0", "version": "7.24.9",
"resolved": "https://registry.npmjs.org/@babel/types/-/types-7.24.0.tgz", "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.24.9.tgz",
"integrity": "sha512-+j7a5c253RfKh8iABBhywc8NSfP5LURe7Uh4qpsh6jc+aLJguvmIUBdjSdEMQv2bENrCR5MfRdjGo7vzS/ob7w==", "integrity": "sha512-xm8XrMKz0IlUdocVbYJe0Z9xEgidU7msskG8BbhnTPK/HZ2z/7FP7ykqPgrUH+C+r414mNfNWam1f2vqOjqjYQ==",
"dev": true, "dev": true,
"dependencies": { "dependencies": {
"@babel/helper-string-parser": "^7.23.4", "@babel/helper-string-parser": "^7.24.8",
"@babel/helper-validator-identifier": "^7.22.20", "@babel/helper-validator-identifier": "^7.24.7",
"to-fast-properties": "^2.0.0" "to-fast-properties": "^2.0.0"
}, },
"engines": { "engines": {
@ -1175,6 +1180,12 @@
"sprintf-js": "~1.0.2" "sprintf-js": "~1.0.2"
} }
}, },
"node_modules/async": {
"version": "3.2.5",
"resolved": "https://registry.npmjs.org/async/-/async-3.2.5.tgz",
"integrity": "sha512-baNZyqaaLhyLVKm/DlvdW051MSgO6b8eVfIezl9E5PqWxFgzLm/wQntEW4zOytVburDEr0JlALEpdOFwvErLsg==",
"dev": true
},
"node_modules/asynckit": { "node_modules/asynckit": {
"version": "0.4.0", "version": "0.4.0",
"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
@ -1326,9 +1337,9 @@
} }
}, },
"node_modules/browserslist": { "node_modules/browserslist": {
"version": "4.23.0", "version": "4.23.2",
"resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.23.0.tgz", "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.23.2.tgz",
"integrity": "sha512-QW8HiM1shhT2GuzkvklfjcKDiWFXHOeFCIA/huJPwHsslwcydgk7X+z2zXpEijP98UCY7HbubZt5J2Zgvf0CaQ==", "integrity": "sha512-qkqSyistMYdxAcw+CzbZwlBy8AGmS/eEWs+sEV5TnLRGDOL+C5M2EnH6tlZyg0YoAxGJAFKh61En9BR941GnHA==",
"dev": true, "dev": true,
"funding": [ "funding": [
{ {
@ -1345,10 +1356,10 @@
} }
], ],
"dependencies": { "dependencies": {
"caniuse-lite": "^1.0.30001587", "caniuse-lite": "^1.0.30001640",
"electron-to-chromium": "^1.4.668", "electron-to-chromium": "^1.4.820",
"node-releases": "^2.0.14", "node-releases": "^2.0.14",
"update-browserslist-db": "^1.0.13" "update-browserslist-db": "^1.1.0"
}, },
"bin": { "bin": {
"browserslist": "cli.js" "browserslist": "cli.js"
@ -1403,9 +1414,9 @@
} }
}, },
"node_modules/caniuse-lite": { "node_modules/caniuse-lite": {
"version": "1.0.30001612", "version": "1.0.30001642",
"resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001612.tgz", "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001642.tgz",
"integrity": "sha512-lFgnZ07UhaCcsSZgWW0K5j4e69dK1u/ltrL9lTUiFOwNHs12S3UMIEYgBV0Z6C6hRDev7iRnMzzYmKabYdXF9g==", "integrity": "sha512-3XQ0DoRgLijXJErLSl+bLnJ+Et4KqV1PY6JJBGAFlsNsz31zeAIncyeZfLCabHK/jtSh+671RM9YMldxjUPZtA==",
"dev": true, "dev": true,
"funding": [ "funding": [
{ {
@ -1651,10 +1662,25 @@
"url": "https://dotenvx.com" "url": "https://dotenvx.com"
} }
}, },
"node_modules/ejs": {
"version": "3.1.10",
"resolved": "https://registry.npmjs.org/ejs/-/ejs-3.1.10.tgz",
"integrity": "sha512-UeJmFfOrAQS8OJWPZ4qtgHyWExa088/MtK5UEyoJGFH67cDEXkZSviOiKRCZ4Xij0zxI3JECgYs3oKx+AizQBA==",
"dev": true,
"dependencies": {
"jake": "^10.8.5"
},
"bin": {
"ejs": "bin/cli.js"
},
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/electron-to-chromium": { "node_modules/electron-to-chromium": {
"version": "1.4.748", "version": "1.4.829",
"resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.4.748.tgz", "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.4.829.tgz",
"integrity": "sha512-VWqjOlPZn70UZ8FTKUOkUvBLeTQ0xpty66qV0yJcAGY2/CthI4xyW9aEozRVtuwv3Kpf5xTesmJUcPwuJmgP4A==", "integrity": "sha512-5qp1N2POAfW0u1qGAxXEtz6P7bO1m6gpZr5hdf5ve6lxpLM7MpiM4jIPz7xcrNlClQMafbyUDDWjlIQZ1Mw0Rw==",
"dev": true "dev": true
}, },
"node_modules/emittery": { "node_modules/emittery": {
@ -1778,6 +1804,36 @@
"bser": "2.1.1" "bser": "2.1.1"
} }
}, },
"node_modules/filelist": {
"version": "1.0.4",
"resolved": "https://registry.npmjs.org/filelist/-/filelist-1.0.4.tgz",
"integrity": "sha512-w1cEuf3S+DrLCQL7ET6kz+gmlJdbq9J7yXCSjK/OZCPA+qEN1WyF4ZAf0YYJa4/shHJra2t/d/r8SV4Ji+x+8Q==",
"dev": true,
"dependencies": {
"minimatch": "^5.0.1"
}
},
"node_modules/filelist/node_modules/brace-expansion": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
"integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
"dev": true,
"dependencies": {
"balanced-match": "^1.0.0"
}
},
"node_modules/filelist/node_modules/minimatch": {
"version": "5.1.6",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.6.tgz",
"integrity": "sha512-lKwV/1brpG6mBUFHtb7NUmtABCb2WZZmm2wNiOA5hAb8VdCS4B3dtMWyvcoViccwAW/COERjXLt0zP1zXUN26g==",
"dev": true,
"dependencies": {
"brace-expansion": "^2.0.1"
},
"engines": {
"node": ">=10"
}
},
"node_modules/fill-range": { "node_modules/fill-range": {
"version": "7.0.1", "version": "7.0.1",
"resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz", "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz",
@ -2180,6 +2236,24 @@
"node": ">=8" "node": ">=8"
} }
}, },
"node_modules/jake": {
"version": "10.9.1",
"resolved": "https://registry.npmjs.org/jake/-/jake-10.9.1.tgz",
"integrity": "sha512-61btcOHNnLnsOdtLgA5efqQWjnSi/vow5HbI7HMdKKWqvrKR1bLK3BPlJn9gcSaP2ewuamUSMB5XEy76KUIS2w==",
"dev": true,
"dependencies": {
"async": "^3.2.3",
"chalk": "^4.0.2",
"filelist": "^1.0.4",
"minimatch": "^3.1.2"
},
"bin": {
"jake": "bin/cli.js"
},
"engines": {
"node": ">=10"
}
},
"node_modules/jest": { "node_modules/jest": {
"version": "29.7.0", "version": "29.7.0",
"resolved": "https://registry.npmjs.org/jest/-/jest-29.7.0.tgz", "resolved": "https://registry.npmjs.org/jest/-/jest-29.7.0.tgz",
@ -3009,9 +3083,9 @@
"dev": true "dev": true
}, },
"node_modules/node-releases": { "node_modules/node-releases": {
"version": "2.0.14", "version": "2.0.17",
"resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.14.tgz", "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.17.tgz",
"integrity": "sha512-y10wOWt8yZpqXmOgRo77WaHEmhYQYGNA6y421PKsKYWEK8aW+cqAphborZDhqfyKrbZEN92CN1X2KbafY2s7Yw==", "integrity": "sha512-Ww6ZlOiEQfPfXM45v17oabk77Z7mg5bOt7AjDyzy7RjK9OrLrLC8dyZQoAPEOtFX9SaNf1Tdvr5gRJWdTJj7GA==",
"dev": true "dev": true
}, },
"node_modules/normalize-path": { "node_modules/normalize-path": {
@ -3162,9 +3236,9 @@
"dev": true "dev": true
}, },
"node_modules/picocolors": { "node_modules/picocolors": {
"version": "1.0.0", "version": "1.0.1",
"resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.0.0.tgz", "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.0.1.tgz",
"integrity": "sha512-1fygroTLlHu66zi26VoTDv8yRgm0Fccecssto+MhsZ0D/DGW2sm8E8AjW7NU5VVTRt5GxbeZ5qBuJr+HyLYkjQ==", "integrity": "sha512-anP1Z8qwhkbmu7MFP5iTt+wQKXgwzf7zTyGlcdzabySa9vd0Xt392U0rVmz9poOaBj0uHJKyyo9/upk0HrEQew==",
"dev": true "dev": true
}, },
"node_modules/picomatch": { "node_modules/picomatch": {
@ -3545,12 +3619,13 @@
} }
}, },
"node_modules/ts-jest": { "node_modules/ts-jest": {
"version": "29.1.2", "version": "29.2.2",
"resolved": "https://registry.npmjs.org/ts-jest/-/ts-jest-29.1.2.tgz", "resolved": "https://registry.npmjs.org/ts-jest/-/ts-jest-29.2.2.tgz",
"integrity": "sha512-br6GJoH/WUX4pu7FbZXuWGKGNDuU7b8Uj77g/Sp7puZV6EXzuByl6JrECvm0MzVzSTkSHWTihsXt+5XYER5b+g==", "integrity": "sha512-sSW7OooaKT34AAngP6k1VS669a0HdLxkQZnlC7T76sckGCokXFnvJ3yRlQZGRTAoV5K19HfSgCiSwWOSIfcYlg==",
"dev": true, "dev": true,
"dependencies": { "dependencies": {
"bs-logger": "0.x", "bs-logger": "0.x",
"ejs": "^3.0.0",
"fast-json-stable-stringify": "2.x", "fast-json-stable-stringify": "2.x",
"jest-util": "^29.0.0", "jest-util": "^29.0.0",
"json5": "^2.2.3", "json5": "^2.2.3",
@ -3563,10 +3638,11 @@
"ts-jest": "cli.js" "ts-jest": "cli.js"
}, },
"engines": { "engines": {
"node": "^16.10.0 || ^18.0.0 || >=20.0.0" "node": "^14.15.0 || ^16.10.0 || ^18.0.0 || >=20.0.0"
}, },
"peerDependencies": { "peerDependencies": {
"@babel/core": ">=7.0.0-beta.0 <8", "@babel/core": ">=7.0.0-beta.0 <8",
"@jest/transform": "^29.0.0",
"@jest/types": "^29.0.0", "@jest/types": "^29.0.0",
"babel-jest": "^29.0.0", "babel-jest": "^29.0.0",
"jest": "^29.0.0", "jest": "^29.0.0",
@ -3576,6 +3652,9 @@
"@babel/core": { "@babel/core": {
"optional": true "optional": true
}, },
"@jest/transform": {
"optional": true
},
"@jest/types": { "@jest/types": {
"optional": true "optional": true
}, },
@ -3661,9 +3740,9 @@
"dev": true "dev": true
}, },
"node_modules/update-browserslist-db": { "node_modules/update-browserslist-db": {
"version": "1.0.13", "version": "1.1.0",
"resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.0.13.tgz", "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.1.0.tgz",
"integrity": "sha512-xebP81SNcPuNpPP3uzeW1NYXxI3rxyJzF3pD6sH4jE7o/IX+WtSpwnVU+qIsDPyk0d3hmFQ7mjqc6AtV604hbg==", "integrity": "sha512-EdRAaAyk2cUE1wOf2DkEhzxqOQvFOoRJFNS6NeyJ01Gp2beMRpBAINjM2iDXE3KCuKhwnvHIQCJm6ThL2Z+HzQ==",
"dev": true, "dev": true,
"funding": [ "funding": [
{ {
@ -3680,8 +3759,8 @@
} }
], ],
"dependencies": { "dependencies": {
"escalade": "^3.1.1", "escalade": "^3.1.2",
"picocolors": "^1.0.0" "picocolors": "^1.0.1"
}, },
"bin": { "bin": {
"update-browserslist-db": "cli.js" "update-browserslist-db": "cli.js"

View File

@ -1,6 +1,6 @@
{ {
"name": "@mendable/firecrawl-js", "name": "@mendable/firecrawl-js",
"version": "0.0.28", "version": "0.0.29",
"description": "JavaScript SDK for Firecrawl API", "description": "JavaScript SDK for Firecrawl API",
"main": "build/index.js", "main": "build/index.js",
"types": "types/index.d.ts", "types": "types/index.d.ts",
@ -9,7 +9,7 @@
"build": "tsc", "build": "tsc",
"build-and-publish": "npm run build && npm publish --access public", "build-and-publish": "npm run build && npm publish --access public",
"publish-beta": "npm run build && npm publish --access public --tag beta", "publish-beta": "npm run build && npm publish --access public --tag beta",
"test": "jest src/__tests__/**/*.test.ts" "test": "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/**/*.test.ts"
}, },
"repository": { "repository": {
"type": "git", "type": "git",
@ -37,7 +37,7 @@
"@types/node": "^20.12.12", "@types/node": "^20.12.12",
"@types/uuid": "^9.0.8", "@types/uuid": "^9.0.8",
"jest": "^29.7.0", "jest": "^29.7.0",
"ts-jest": "^29.1.2", "ts-jest": "^29.2.2",
"typescript": "^5.4.5" "typescript": "^5.4.5"
}, },
"keywords": [ "keywords": [

View File

@ -1,7 +1,7 @@
import FirecrawlApp from '../../index'; import FirecrawlApp from '../../index';
import { v4 as uuidv4 } from 'uuid'; import { v4 as uuidv4 } from 'uuid';
import dotenv from 'dotenv'; import dotenv from 'dotenv';
import { describe, test, expect } from '@jest/globals';
dotenv.config(); dotenv.config();
@ -9,7 +9,7 @@ const TEST_API_KEY = process.env.TEST_API_KEY;
const API_URL = "http://127.0.0.1:3002"; const API_URL = "http://127.0.0.1:3002";
describe('FirecrawlApp E2E Tests', () => { describe('FirecrawlApp E2E Tests', () => {
test.concurrent('should throw error for no API key', () => { test.concurrent('should throw error for no API key', async () => {
expect(() => { expect(() => {
new FirecrawlApp({ apiKey: null, apiUrl: API_URL }); new FirecrawlApp({ apiKey: null, apiUrl: API_URL });
}).toThrow("No API key provided"); }).toThrow("No API key provided");
@ -107,12 +107,16 @@ describe('FirecrawlApp E2E Tests', () => {
while (statusResponse.status === 'active' && checks < maxChecks) { while (statusResponse.status === 'active' && checks < maxChecks) {
await new Promise(resolve => setTimeout(resolve, 1000)); await new Promise(resolve => setTimeout(resolve, 1000));
expect(statusResponse.partial_data).not.toBeNull(); expect(statusResponse.partial_data).not.toBeNull();
expect(statusResponse.current).toBeGreaterThanOrEqual(1);
statusResponse = await app.checkCrawlStatus(response.jobId); statusResponse = await app.checkCrawlStatus(response.jobId);
checks++; checks++;
} }
expect(statusResponse).not.toBeNull(); expect(statusResponse).not.toBeNull();
expect(statusResponse.success).toBe(true);
expect(statusResponse.status).toBe('completed'); expect(statusResponse.status).toBe('completed');
expect(statusResponse.total).toEqual(statusResponse.current);
expect(statusResponse.current_step).not.toBeNull();
expect(statusResponse?.data?.length).toBeGreaterThan(0); expect(statusResponse?.data?.length).toBeGreaterThan(0);
}, 35000); // 35 seconds timeout }, 35000); // 35 seconds timeout

View File

@ -100,6 +100,10 @@ export interface CrawlResponse {
export interface JobStatusResponse { export interface JobStatusResponse {
success: boolean; success: boolean;
status: string; status: string;
current?: number;
current_url?: string;
current_step?: string;
total?: number;
jobId?: string; jobId?: string;
data?: FirecrawlDocument[]; data?: FirecrawlDocument[];
partial_data?: FirecrawlDocument[]; partial_data?: FirecrawlDocument[];
@ -287,6 +291,10 @@ export default class FirecrawlApp {
return { return {
success: true, success: true,
status: response.data.status, status: response.data.status,
current: response.data.current,
current_url: response.data.current_url,
current_step: response.data.current_step,
total: response.data.total,
data: response.data.data, data: response.data.data,
partial_data: !response.data.data partial_data: !response.data.data
? response.data.partial_data ? response.data.partial_data
@ -301,6 +309,10 @@ export default class FirecrawlApp {
return { return {
success: false, success: false,
status: "unknown", status: "unknown",
current: 0,
current_url: "",
current_step: "",
total: 0,
error: "Internal server error.", error: "Internal server error.",
}; };
} }

View File

@ -7,6 +7,7 @@ x-common-service: &common-service
- backend - backend
environment: environment:
- REDIS_URL=${REDIS_URL:-redis://redis:6379} - REDIS_URL=${REDIS_URL:-redis://redis:6379}
- REDIS_RATE_LIMIT_URL=${REDIS_URL:-redis://redis:6379}
- PLAYWRIGHT_MICROSERVICE_URL=${PLAYWRIGHT_MICROSERVICE_URL:-http://playwright-service:3000} - PLAYWRIGHT_MICROSERVICE_URL=${PLAYWRIGHT_MICROSERVICE_URL:-http://playwright-service:3000}
- USE_DB_AUTHENTICATION=${USE_DB_AUTHENTICATION} - USE_DB_AUTHENTICATION=${USE_DB_AUTHENTICATION}
- PORT=${PORT:-3002} - PORT=${PORT:-3002}