mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-04 21:40:40 +08:00
Merge branch 'v1-webscraper' of https://github.com/mendableai/firecrawl into v1-webscraper
This commit is contained in:
commit
674adee144
@ -33,6 +33,8 @@ const url = z.preprocess(
|
|||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
const strictMessage = "Unrecognized key in body -- please review the v1 API documentation for request body changes";
|
||||||
|
|
||||||
export const scrapeOptions = z.object({
|
export const scrapeOptions = z.object({
|
||||||
formats: z
|
formats: z
|
||||||
.enum([
|
.enum([
|
||||||
@ -53,14 +55,14 @@ export const scrapeOptions = z.object({
|
|||||||
timeout: z.number().int().positive().finite().safe().default(30000), // default?
|
timeout: z.number().int().positive().finite().safe().default(30000), // default?
|
||||||
waitFor: z.number().int().nonnegative().finite().safe().default(0),
|
waitFor: z.number().int().nonnegative().finite().safe().default(0),
|
||||||
parsePDF: z.boolean().default(true),
|
parsePDF: z.boolean().default(true),
|
||||||
});
|
}).strict(strictMessage);
|
||||||
|
|
||||||
export type ScrapeOptions = z.infer<typeof scrapeOptions>;
|
export type ScrapeOptions = z.infer<typeof scrapeOptions>;
|
||||||
|
|
||||||
export const scrapeRequestSchema = scrapeOptions.extend({
|
export const scrapeRequestSchema = scrapeOptions.extend({
|
||||||
url,
|
url,
|
||||||
origin: z.string().optional().default("api"),
|
origin: z.string().optional().default("api"),
|
||||||
});
|
}).strict(strictMessage);
|
||||||
|
|
||||||
// export type ScrapeRequest = {
|
// export type ScrapeRequest = {
|
||||||
// url: string;
|
// url: string;
|
||||||
@ -83,7 +85,7 @@ const crawlerOptions = z.object({
|
|||||||
allowBackwardLinks: z.boolean().default(false), // >> TODO: CHANGE THIS NAME???
|
allowBackwardLinks: z.boolean().default(false), // >> TODO: CHANGE THIS NAME???
|
||||||
allowExternalLinks: z.boolean().default(false),
|
allowExternalLinks: z.boolean().default(false),
|
||||||
ignoreSitemap: z.boolean().default(true),
|
ignoreSitemap: z.boolean().default(true),
|
||||||
});
|
}).strict(strictMessage);
|
||||||
|
|
||||||
// export type CrawlerOptions = {
|
// export type CrawlerOptions = {
|
||||||
// includePaths?: string[];
|
// includePaths?: string[];
|
||||||
@ -97,14 +99,13 @@ const crawlerOptions = z.object({
|
|||||||
|
|
||||||
export type CrawlerOptions = z.infer<typeof crawlerOptions>;
|
export type CrawlerOptions = z.infer<typeof crawlerOptions>;
|
||||||
|
|
||||||
export const crawlRequestSchema = z.object({
|
export const crawlRequestSchema = crawlerOptions.extend({
|
||||||
url,
|
url,
|
||||||
origin: z.string().optional().default("api"),
|
origin: z.string().optional().default("api"),
|
||||||
crawlerOptions: crawlerOptions.default({}), // TODO: Get rid of this
|
|
||||||
scrapeOptions: scrapeOptions.omit({ timeout: true }).default({}),
|
scrapeOptions: scrapeOptions.omit({ timeout: true }).default({}),
|
||||||
webhook: z.string().url().optional(),
|
webhook: z.string().url().optional(),
|
||||||
limit: z.number().default(10000), //
|
limit: z.number().default(10000),
|
||||||
});
|
}).strict(strictMessage);
|
||||||
|
|
||||||
// export type CrawlRequest = {
|
// export type CrawlRequest = {
|
||||||
// url: string;
|
// url: string;
|
||||||
@ -120,7 +121,7 @@ export const mapRequestSchema = crawlerOptions.extend({
|
|||||||
includeSubdomains: z.boolean().default(true),
|
includeSubdomains: z.boolean().default(true),
|
||||||
search: z.string().optional(),
|
search: z.string().optional(),
|
||||||
ignoreSitemap: z.boolean().default(false),
|
ignoreSitemap: z.boolean().default(false),
|
||||||
});
|
}).strict(strictMessage);
|
||||||
|
|
||||||
// export type MapRequest = {
|
// export type MapRequest = {
|
||||||
// url: string;
|
// url: string;
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
|
import "dotenv/config";
|
||||||
import { CustomError } from "../lib/custom-error";
|
import { CustomError } from "../lib/custom-error";
|
||||||
import {
|
import {
|
||||||
getScrapeQueue,
|
getScrapeQueue,
|
||||||
redisConnection,
|
redisConnection,
|
||||||
scrapeQueueName,
|
scrapeQueueName,
|
||||||
} from "./queue-service";
|
} from "./queue-service";
|
||||||
import "dotenv/config";
|
|
||||||
import { logtail } from "./logtail";
|
import { logtail } from "./logtail";
|
||||||
import { startWebScraperPipeline } from "../main/runWebScraper";
|
import { startWebScraperPipeline } from "../main/runWebScraper";
|
||||||
import { callWebhook } from "./webhook";
|
import { callWebhook } from "./webhook";
|
||||||
|
Loading…
x
Reference in New Issue
Block a user