Merge branch 'v1-webscraper' of https://github.com/mendableai/firecrawl into v1-webscraper

This commit is contained in:
Nicolas 2024-08-20 14:41:05 -03:00
commit 674adee144
2 changed files with 10 additions and 9 deletions

View File

@ -33,6 +33,8 @@ const url = z.preprocess(
) )
); );
const strictMessage = "Unrecognized key in body -- please review the v1 API documentation for request body changes";
export const scrapeOptions = z.object({ export const scrapeOptions = z.object({
formats: z formats: z
.enum([ .enum([
@ -53,14 +55,14 @@ export const scrapeOptions = z.object({
timeout: z.number().int().positive().finite().safe().default(30000), // default? timeout: z.number().int().positive().finite().safe().default(30000), // default?
waitFor: z.number().int().nonnegative().finite().safe().default(0), waitFor: z.number().int().nonnegative().finite().safe().default(0),
parsePDF: z.boolean().default(true), parsePDF: z.boolean().default(true),
}); }).strict(strictMessage);
export type ScrapeOptions = z.infer<typeof scrapeOptions>; export type ScrapeOptions = z.infer<typeof scrapeOptions>;
export const scrapeRequestSchema = scrapeOptions.extend({ export const scrapeRequestSchema = scrapeOptions.extend({
url, url,
origin: z.string().optional().default("api"), origin: z.string().optional().default("api"),
}); }).strict(strictMessage);
// export type ScrapeRequest = { // export type ScrapeRequest = {
// url: string; // url: string;
@ -83,7 +85,7 @@ const crawlerOptions = z.object({
allowBackwardLinks: z.boolean().default(false), // >> TODO: CHANGE THIS NAME??? allowBackwardLinks: z.boolean().default(false), // >> TODO: CHANGE THIS NAME???
allowExternalLinks: z.boolean().default(false), allowExternalLinks: z.boolean().default(false),
ignoreSitemap: z.boolean().default(true), ignoreSitemap: z.boolean().default(true),
}); }).strict(strictMessage);
// export type CrawlerOptions = { // export type CrawlerOptions = {
// includePaths?: string[]; // includePaths?: string[];
@ -97,14 +99,13 @@ const crawlerOptions = z.object({
export type CrawlerOptions = z.infer<typeof crawlerOptions>; export type CrawlerOptions = z.infer<typeof crawlerOptions>;
export const crawlRequestSchema = z.object({ export const crawlRequestSchema = crawlerOptions.extend({
url, url,
origin: z.string().optional().default("api"), origin: z.string().optional().default("api"),
crawlerOptions: crawlerOptions.default({}), // TODO: Get rid of this
scrapeOptions: scrapeOptions.omit({ timeout: true }).default({}), scrapeOptions: scrapeOptions.omit({ timeout: true }).default({}),
webhook: z.string().url().optional(), webhook: z.string().url().optional(),
limit: z.number().default(10000), // limit: z.number().default(10000),
}); }).strict(strictMessage);
// export type CrawlRequest = { // export type CrawlRequest = {
// url: string; // url: string;
@ -120,7 +121,7 @@ export const mapRequestSchema = crawlerOptions.extend({
includeSubdomains: z.boolean().default(true), includeSubdomains: z.boolean().default(true),
search: z.string().optional(), search: z.string().optional(),
ignoreSitemap: z.boolean().default(false), ignoreSitemap: z.boolean().default(false),
}); }).strict(strictMessage);
// export type MapRequest = { // export type MapRequest = {
// url: string; // url: string;

View File

@ -1,10 +1,10 @@
import "dotenv/config";
import { CustomError } from "../lib/custom-error"; import { CustomError } from "../lib/custom-error";
import { import {
getScrapeQueue, getScrapeQueue,
redisConnection, redisConnection,
scrapeQueueName, scrapeQueueName,
} from "./queue-service"; } from "./queue-service";
import "dotenv/config";
import { logtail } from "./logtail"; import { logtail } from "./logtail";
import { startWebScraperPipeline } from "../main/runWebScraper"; import { startWebScraperPipeline } from "../main/runWebScraper";
import { callWebhook } from "./webhook"; import { callWebhook } from "./webhook";