mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-07-09 10:11:49 +08:00
70 lines
1.6 KiB
TypeScript
70 lines
1.6 KiB
TypeScript
export interface Progress {
|
|
current: number;
|
|
total: number;
|
|
status: string;
|
|
metadata?: {
|
|
sourceURL?: string;
|
|
[key: string]: any;
|
|
};
|
|
currentDocumentUrl?: string;
|
|
}
|
|
|
|
export type PageOptions = {
|
|
onlyMainContent?: boolean;
|
|
fallback?: boolean;
|
|
fetchPageContent?: boolean;
|
|
|
|
};
|
|
|
|
export type SearchOptions = {
|
|
limit?: number;
|
|
tbs?: string;
|
|
filter?: string;
|
|
};
|
|
|
|
export type WebScraperOptions = {
|
|
urls: string[];
|
|
mode: "single_urls" | "sitemap" | "crawl";
|
|
crawlerOptions?: {
|
|
returnOnlyUrls?: boolean;
|
|
includes?: string[];
|
|
excludes?: string[];
|
|
maxCrawledLinks?: number;
|
|
limit?: number;
|
|
generateImgAltText?: boolean;
|
|
replaceAllPathsWithAbsolutePaths?: boolean;
|
|
};
|
|
pageOptions?: PageOptions;
|
|
concurrentRequests?: number;
|
|
};
|
|
|
|
export class Document {
|
|
id?: string;
|
|
url?: string; // Used only in /search for now
|
|
content: string;
|
|
markdown?: string;
|
|
createdAt?: Date;
|
|
updatedAt?: Date;
|
|
type?: string;
|
|
metadata: {
|
|
sourceURL?: string;
|
|
[key: string]: any;
|
|
};
|
|
childrenLinks?: string[];
|
|
provider?: string;
|
|
|
|
constructor(data: Partial<Document>) {
|
|
if (!data.content) {
|
|
throw new Error("Missing required fields");
|
|
}
|
|
this.content = data.content;
|
|
this.createdAt = data.createdAt || new Date();
|
|
this.updatedAt = data.updatedAt || new Date();
|
|
this.type = data.type || "unknown";
|
|
this.metadata = data.metadata || { sourceURL: "" };
|
|
this.markdown = data.markdown || "";
|
|
this.childrenLinks = data.childrenLinks || undefined;
|
|
this.provider = data.provider || undefined;
|
|
}
|
|
}
|