firecrawl/apps/api/src/lib/entities.ts
2024-04-23 16:45:06 -07:00

69 lines
1.5 KiB
TypeScript

export interface Progress {
current: number;
total: number;
status: string;
metadata?: {
sourceURL?: string;
[key: string]: any;
};
currentDocumentUrl?: string;
}
export type PageOptions = {
onlyMainContent?: boolean;
fallback?: boolean;
fetchPageContent?: boolean;
};
export type SearchOptions = {
limit?: number;
tbs?: string;
filter?: string;
};
export type WebScraperOptions = {
urls: string[];
mode: "single_urls" | "sitemap" | "crawl";
crawlerOptions?: {
returnOnlyUrls?: boolean;
includes?: string[];
excludes?: string[];
maxCrawledLinks?: number;
limit?: number;
generateImgAltText?: boolean;
replaceAllPathsWithAbsolutePaths?: boolean;
};
pageOptions?: PageOptions;
concurrentRequests?: number;
};
export class Document {
id?: string;
content: string;
markdown?: string;
createdAt?: Date;
updatedAt?: Date;
type?: string;
metadata: {
sourceURL?: string;
[key: string]: any;
};
childrenLinks?: string[];
provider?: string;
constructor(data: Partial<Document>) {
if (!data.content) {
throw new Error("Missing required fields");
}
this.content = data.content;
this.createdAt = data.createdAt || new Date();
this.updatedAt = data.updatedAt || new Date();
this.type = data.type || "unknown";
this.metadata = data.metadata || { sourceURL: "" };
this.markdown = data.markdown || "";
this.childrenLinks = data.childrenLinks || undefined;
this.provider = data.provider || undefined;
}
}