mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-11 21:09:00 +08:00
(feat/deep-research-alpha) Added Max Urls, Sources and Fixes (#1271)
* Nick: fixes * Nick: * Update deep-research-status.ts
This commit is contained in:
parent
1d3757b391
commit
289e351c14
@ -32,6 +32,8 @@ export async function deepResearchStatusController(
|
||||
success: research.status === "failed" ? false : true,
|
||||
data: {
|
||||
finalAnalysis: research.finalAnalysis,
|
||||
sources: research.sources,
|
||||
activities: research.activities,
|
||||
// completedSteps: research.completedSteps,
|
||||
// totalSteps: research.totalExpectedSteps,
|
||||
},
|
||||
@ -40,6 +42,7 @@ export async function deepResearchStatusController(
|
||||
currentDepth: research.currentDepth,
|
||||
maxDepth: research.maxDepth,
|
||||
status: research.status,
|
||||
// DO NOT remove - backwards compatibility
|
||||
activities: research.activities,
|
||||
sources: research.sources,
|
||||
// summaries: research.summaries,
|
||||
|
@ -8,6 +8,7 @@ import { z } from "zod";
|
||||
export const deepResearchRequestSchema = z.object({
|
||||
topic: z.string().describe('The topic or question to research'),
|
||||
maxDepth: z.number().min(1).max(10).default(7).describe('Maximum depth of research iterations'),
|
||||
maxUrls: z.number().min(1).max(1000).default(20).describe('Maximum number of URLs to analyze'),
|
||||
timeLimit: z.number().min(30).max(600).default(300).describe('Time limit in seconds'),
|
||||
__experimental_streamSteps: z.boolean().optional(),
|
||||
});
|
||||
|
@ -89,6 +89,8 @@ export async function updateDeepResearch(
|
||||
: current.summaries
|
||||
};
|
||||
|
||||
|
||||
|
||||
await redisConnection.set("deep-research:" + id, JSON.stringify(updatedResearch));
|
||||
await redisConnection.expire("deep-research:" + id, DEEP_RESEARCH_TTL);
|
||||
}
|
||||
|
@ -13,14 +13,16 @@ interface DeepResearchServiceOptions {
|
||||
plan: string;
|
||||
topic: string;
|
||||
maxDepth: number;
|
||||
maxUrls: number;
|
||||
timeLimit: number;
|
||||
subId?: string;
|
||||
}
|
||||
|
||||
export async function performDeepResearch(options: DeepResearchServiceOptions) {
|
||||
const { researchId, teamId, plan, timeLimit, subId } = options;
|
||||
const { researchId, teamId, plan, timeLimit, subId, maxUrls } = options;
|
||||
const startTime = Date.now();
|
||||
let currentTopic = options.topic;
|
||||
let urlsAnalyzed = 0;
|
||||
|
||||
const logger = _logger.child({
|
||||
module: "deep-research",
|
||||
@ -41,7 +43,7 @@ export async function performDeepResearch(options: DeepResearchServiceOptions) {
|
||||
const llmService = new ResearchLLMService(logger);
|
||||
|
||||
try {
|
||||
while (!state.hasReachedMaxDepth()) {
|
||||
while (!state.hasReachedMaxDepth() && urlsAnalyzed < maxUrls) {
|
||||
logger.debug("[Deep Research] Current depth:", state.getCurrentDepth());
|
||||
const timeElapsed = Date.now() - startTime;
|
||||
if (timeElapsed >= timeLimit * 1000) {
|
||||
@ -135,14 +137,22 @@ export async function performDeepResearch(options: DeepResearchServiceOptions) {
|
||||
}
|
||||
|
||||
// Filter out already seen URLs and track new ones
|
||||
const newSearchResults = searchResults.filter((result) => {
|
||||
const newSearchResults = searchResults.filter(async (result) => {
|
||||
if (!result.url || state.hasSeenUrl(result.url)) {
|
||||
return false;
|
||||
}
|
||||
state.addSeenUrl(result.url);
|
||||
|
||||
urlsAnalyzed++;
|
||||
return true;
|
||||
});
|
||||
|
||||
await state.addSources(newSearchResults.map((result) => ({
|
||||
url: result.url ?? "",
|
||||
title: result.title ?? "",
|
||||
description: result.description ?? "",
|
||||
icon: result.metadata?.favicon ?? "",
|
||||
})));
|
||||
logger.debug(
|
||||
"[Deep Research] New unique results count:",
|
||||
{ length: newSearchResults.length },
|
||||
@ -272,7 +282,7 @@ export async function performDeepResearch(options: DeepResearchServiceOptions) {
|
||||
success: true,
|
||||
message: "Research completed",
|
||||
num_docs: 1,
|
||||
docs: [{ finalAnalysis: finalAnalysis }],
|
||||
docs: [{ finalAnalysis: finalAnalysis, sources: state.getSources() }],
|
||||
time_taken: (Date.now() - startTime) / 1000,
|
||||
team_id: teamId,
|
||||
mode: "deep-research",
|
||||
@ -281,17 +291,16 @@ export async function performDeepResearch(options: DeepResearchServiceOptions) {
|
||||
origin: "api",
|
||||
num_tokens: 0,
|
||||
tokens_billed: 0,
|
||||
sources: {},
|
||||
});
|
||||
await updateDeepResearch(researchId, {
|
||||
status: "completed",
|
||||
finalAnalysis: finalAnalysis,
|
||||
});
|
||||
// Bill team for usage
|
||||
billTeam(teamId, subId, state.getFindings().length, logger).catch(
|
||||
// Bill team for usage based on URLs analyzed
|
||||
billTeam(teamId, subId, urlsAnalyzed, logger).catch(
|
||||
(error) => {
|
||||
logger.error(
|
||||
`Failed to bill team ${teamId} for ${state.getFindings().length} findings`, { teamId, count: state.getFindings().length, error },
|
||||
`Failed to bill team ${teamId} for ${urlsAnalyzed} URLs analyzed`, { teamId, count: urlsAnalyzed, error },
|
||||
);
|
||||
},
|
||||
);
|
||||
|
@ -25,7 +25,7 @@ export class ResearchStateManager {
|
||||
private completedSteps: number = 0;
|
||||
private readonly totalExpectedSteps: number;
|
||||
private seenUrls: Set<string> = new Set();
|
||||
|
||||
private sources: DeepResearchSource[] = [];
|
||||
constructor(
|
||||
private readonly researchId: string,
|
||||
private readonly teamId: string,
|
||||
@ -61,9 +61,9 @@ export class ResearchStateManager {
|
||||
});
|
||||
}
|
||||
|
||||
async addSource(source: DeepResearchSource): Promise<void> {
|
||||
async addSources(sources: DeepResearchSource[]): Promise<void> {
|
||||
await updateDeepResearch(this.researchId, {
|
||||
sources: [source],
|
||||
sources: sources,
|
||||
});
|
||||
}
|
||||
|
||||
@ -136,6 +136,10 @@ export class ResearchStateManager {
|
||||
getUrlToSearch(): string {
|
||||
return this.urlToSearch;
|
||||
}
|
||||
|
||||
getSources(): DeepResearchSource[] {
|
||||
return this.sources;
|
||||
}
|
||||
}
|
||||
|
||||
export class ResearchLLMService {
|
||||
@ -254,17 +258,12 @@ export class ResearchLLMService {
|
||||
logger: this.logger.child({
|
||||
method: "generateFinalAnalysis",
|
||||
}),
|
||||
mode: "no-object",
|
||||
options: {
|
||||
mode: "llm",
|
||||
systemPrompt:
|
||||
"You are an expert research analyst who creates comprehensive, well-structured reports. Your reports are detailed, properly formatted in Markdown, and include clear sections with citations. Today's date is " +
|
||||
new Date().toISOString().split("T")[0],
|
||||
schema: {
|
||||
type: "object",
|
||||
properties: {
|
||||
report: { type: "string" },
|
||||
},
|
||||
},
|
||||
prompt: trimToTokenLimit(
|
||||
`Create a comprehensive research report on "${topic}" based on the collected findings and analysis.
|
||||
|
||||
@ -285,6 +284,6 @@ export class ResearchLLMService {
|
||||
markdown: "",
|
||||
});
|
||||
|
||||
return extract.report;
|
||||
return extract;
|
||||
}
|
||||
}
|
||||
|
@ -156,6 +156,7 @@ export async function generateCompletions({
|
||||
previousWarning,
|
||||
isExtractEndpoint,
|
||||
model = getModel("gpt-4o-mini"),
|
||||
mode = "object",
|
||||
}: {
|
||||
model?: LanguageModel;
|
||||
logger: Logger;
|
||||
@ -163,6 +164,7 @@ export async function generateCompletions({
|
||||
markdown?: string;
|
||||
previousWarning?: string;
|
||||
isExtractEndpoint?: boolean;
|
||||
mode?: "object" | "no-object";
|
||||
}): Promise<{
|
||||
extract: any;
|
||||
numTokens: number;
|
||||
@ -192,44 +194,67 @@ export async function generateCompletions({
|
||||
markdown = trimmedMarkdown;
|
||||
warning = trimWarning;
|
||||
|
||||
let schema = options.schema;
|
||||
// Normalize the bad json schema users write (mogery)
|
||||
if (schema && !(schema instanceof z.ZodType)) {
|
||||
// let schema = options.schema;
|
||||
if (schema) {
|
||||
schema = removeDefaultProperty(schema);
|
||||
}
|
||||
|
||||
if (schema && schema.type === "array") {
|
||||
schema = {
|
||||
type: "object",
|
||||
properties: {
|
||||
items: options.schema,
|
||||
},
|
||||
required: ["items"],
|
||||
additionalProperties: false,
|
||||
};
|
||||
} else if (schema && typeof schema === "object" && !schema.type) {
|
||||
schema = {
|
||||
type: "object",
|
||||
properties: Object.fromEntries(
|
||||
Object.entries(schema).map(([key, value]) => {
|
||||
return [key, removeDefaultProperty(value)];
|
||||
}),
|
||||
),
|
||||
required: Object.keys(schema),
|
||||
additionalProperties: false,
|
||||
};
|
||||
}
|
||||
|
||||
schema = normalizeSchema(schema);
|
||||
}
|
||||
|
||||
try {
|
||||
const prompt = options.prompt !== undefined
|
||||
? `Transform the following content into structured JSON output based on the provided schema and this user request: ${options.prompt}. If schema is provided, strictly follow it.\n\n${markdown}`
|
||||
: `Transform the following content into structured JSON output based on the provided schema if any.\n\n${markdown}`;
|
||||
|
||||
if (mode === "no-object") {
|
||||
const result = await generateText({
|
||||
model: model,
|
||||
prompt: options.prompt + (markdown ? `\n\nData:${markdown}` : ""),
|
||||
temperature: options.temperature ?? 0,
|
||||
system: options.systemPrompt,
|
||||
});
|
||||
|
||||
extract = result.text;
|
||||
|
||||
return {
|
||||
extract,
|
||||
warning,
|
||||
numTokens,
|
||||
totalUsage: {
|
||||
promptTokens: numTokens,
|
||||
completionTokens: result.usage?.completionTokens ?? 0,
|
||||
totalTokens: numTokens + (result.usage?.completionTokens ?? 0),
|
||||
},
|
||||
model: model.modelId,
|
||||
};
|
||||
}
|
||||
|
||||
let schema = options.schema;
|
||||
// Normalize the bad json schema users write (mogery)
|
||||
if (schema && !(schema instanceof z.ZodType)) {
|
||||
// let schema = options.schema;
|
||||
if (schema) {
|
||||
schema = removeDefaultProperty(schema);
|
||||
}
|
||||
|
||||
if (schema && schema.type === "array") {
|
||||
schema = {
|
||||
type: "object",
|
||||
properties: {
|
||||
items: options.schema,
|
||||
},
|
||||
required: ["items"],
|
||||
additionalProperties: false,
|
||||
};
|
||||
} else if (schema && typeof schema === "object" && !schema.type) {
|
||||
schema = {
|
||||
type: "object",
|
||||
properties: Object.fromEntries(
|
||||
Object.entries(schema).map(([key, value]) => {
|
||||
return [key, removeDefaultProperty(value)];
|
||||
}),
|
||||
),
|
||||
required: Object.keys(schema),
|
||||
additionalProperties: false,
|
||||
};
|
||||
}
|
||||
|
||||
schema = normalizeSchema(schema);
|
||||
}
|
||||
|
||||
const repairConfig = {
|
||||
experimental_repairText: async ({ text, error }) => {
|
||||
const { text: fixedText } = await generateText({
|
||||
@ -241,7 +266,6 @@ export async function generateCompletions({
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
const generateObjectConfig = {
|
||||
model: model,
|
||||
prompt: prompt,
|
||||
|
@ -410,6 +410,7 @@ const processDeepResearchJobInternal = async (
|
||||
maxDepth: job.data.request.maxDepth,
|
||||
timeLimit: job.data.request.timeLimit,
|
||||
subId: job.data.subId,
|
||||
maxUrls: job.data.request.maxUrls,
|
||||
});
|
||||
|
||||
if(result.success) {
|
||||
|
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@mendable/firecrawl-js",
|
||||
"version": "1.18.3-beta.1",
|
||||
"version": "1.18.4",
|
||||
"description": "JavaScript SDK for Firecrawl API",
|
||||
"main": "dist/index.js",
|
||||
"types": "dist/index.d.ts",
|
||||
|
@ -364,6 +364,11 @@ export interface DeepResearchParams {
|
||||
* @default 270
|
||||
*/
|
||||
timeLimit?: number;
|
||||
/**
|
||||
* Maximum number of URLs to analyze (1-1000)
|
||||
* @default 20
|
||||
*/
|
||||
maxUrls?: number;
|
||||
/**
|
||||
* Experimental flag for streaming steps
|
||||
*/
|
||||
|
Loading…
x
Reference in New Issue
Block a user