diff --git a/apps/api/src/controllers/v1/deep-research-status.ts b/apps/api/src/controllers/v1/deep-research-status.ts index 43e99630..06fff76c 100644 --- a/apps/api/src/controllers/v1/deep-research-status.ts +++ b/apps/api/src/controllers/v1/deep-research-status.ts @@ -32,6 +32,8 @@ export async function deepResearchStatusController( success: research.status === "failed" ? false : true, data: { finalAnalysis: research.finalAnalysis, + sources: research.sources, + activities: research.activities, // completedSteps: research.completedSteps, // totalSteps: research.totalExpectedSteps, }, @@ -40,6 +42,7 @@ export async function deepResearchStatusController( currentDepth: research.currentDepth, maxDepth: research.maxDepth, status: research.status, + // DO NOT remove - backwards compatibility activities: research.activities, sources: research.sources, // summaries: research.summaries, diff --git a/apps/api/src/controllers/v1/deep-research.ts b/apps/api/src/controllers/v1/deep-research.ts index f5d7b072..e26715de 100644 --- a/apps/api/src/controllers/v1/deep-research.ts +++ b/apps/api/src/controllers/v1/deep-research.ts @@ -8,6 +8,7 @@ import { z } from "zod"; export const deepResearchRequestSchema = z.object({ topic: z.string().describe('The topic or question to research'), maxDepth: z.number().min(1).max(10).default(7).describe('Maximum depth of research iterations'), + maxUrls: z.number().min(1).max(1000).default(20).describe('Maximum number of URLs to analyze'), timeLimit: z.number().min(30).max(600).default(300).describe('Time limit in seconds'), __experimental_streamSteps: z.boolean().optional(), }); diff --git a/apps/api/src/lib/deep-research/deep-research-redis.ts b/apps/api/src/lib/deep-research/deep-research-redis.ts index efe757f6..8b1d9c7a 100644 --- a/apps/api/src/lib/deep-research/deep-research-redis.ts +++ b/apps/api/src/lib/deep-research/deep-research-redis.ts @@ -89,6 +89,8 @@ export async function updateDeepResearch( : current.summaries }; + + await redisConnection.set("deep-research:" + id, JSON.stringify(updatedResearch)); await redisConnection.expire("deep-research:" + id, DEEP_RESEARCH_TTL); } diff --git a/apps/api/src/lib/deep-research/deep-research-service.ts b/apps/api/src/lib/deep-research/deep-research-service.ts index 7e19fc98..7de4609c 100644 --- a/apps/api/src/lib/deep-research/deep-research-service.ts +++ b/apps/api/src/lib/deep-research/deep-research-service.ts @@ -13,14 +13,16 @@ interface DeepResearchServiceOptions { plan: string; topic: string; maxDepth: number; + maxUrls: number; timeLimit: number; subId?: string; } export async function performDeepResearch(options: DeepResearchServiceOptions) { - const { researchId, teamId, plan, timeLimit, subId } = options; + const { researchId, teamId, plan, timeLimit, subId, maxUrls } = options; const startTime = Date.now(); let currentTopic = options.topic; + let urlsAnalyzed = 0; const logger = _logger.child({ module: "deep-research", @@ -41,7 +43,7 @@ export async function performDeepResearch(options: DeepResearchServiceOptions) { const llmService = new ResearchLLMService(logger); try { - while (!state.hasReachedMaxDepth()) { + while (!state.hasReachedMaxDepth() && urlsAnalyzed < maxUrls) { logger.debug("[Deep Research] Current depth:", state.getCurrentDepth()); const timeElapsed = Date.now() - startTime; if (timeElapsed >= timeLimit * 1000) { @@ -135,14 +137,22 @@ export async function performDeepResearch(options: DeepResearchServiceOptions) { } // Filter out already seen URLs and track new ones - const newSearchResults = searchResults.filter((result) => { + const newSearchResults = searchResults.filter(async (result) => { if (!result.url || state.hasSeenUrl(result.url)) { return false; } state.addSeenUrl(result.url); + + urlsAnalyzed++; return true; }); + await state.addSources(newSearchResults.map((result) => ({ + url: result.url ?? "", + title: result.title ?? "", + description: result.description ?? "", + icon: result.metadata?.favicon ?? "", + }))); logger.debug( "[Deep Research] New unique results count:", { length: newSearchResults.length }, @@ -272,7 +282,7 @@ export async function performDeepResearch(options: DeepResearchServiceOptions) { success: true, message: "Research completed", num_docs: 1, - docs: [{ finalAnalysis: finalAnalysis }], + docs: [{ finalAnalysis: finalAnalysis, sources: state.getSources() }], time_taken: (Date.now() - startTime) / 1000, team_id: teamId, mode: "deep-research", @@ -281,17 +291,16 @@ export async function performDeepResearch(options: DeepResearchServiceOptions) { origin: "api", num_tokens: 0, tokens_billed: 0, - sources: {}, }); await updateDeepResearch(researchId, { status: "completed", finalAnalysis: finalAnalysis, }); - // Bill team for usage - billTeam(teamId, subId, state.getFindings().length, logger).catch( + // Bill team for usage based on URLs analyzed + billTeam(teamId, subId, urlsAnalyzed, logger).catch( (error) => { logger.error( - `Failed to bill team ${teamId} for ${state.getFindings().length} findings`, { teamId, count: state.getFindings().length, error }, + `Failed to bill team ${teamId} for ${urlsAnalyzed} URLs analyzed`, { teamId, count: urlsAnalyzed, error }, ); }, ); diff --git a/apps/api/src/lib/deep-research/research-manager.ts b/apps/api/src/lib/deep-research/research-manager.ts index 61d5bd34..8a5bf839 100644 --- a/apps/api/src/lib/deep-research/research-manager.ts +++ b/apps/api/src/lib/deep-research/research-manager.ts @@ -25,7 +25,7 @@ export class ResearchStateManager { private completedSteps: number = 0; private readonly totalExpectedSteps: number; private seenUrls: Set = new Set(); - + private sources: DeepResearchSource[] = []; constructor( private readonly researchId: string, private readonly teamId: string, @@ -61,9 +61,9 @@ export class ResearchStateManager { }); } - async addSource(source: DeepResearchSource): Promise { + async addSources(sources: DeepResearchSource[]): Promise { await updateDeepResearch(this.researchId, { - sources: [source], + sources: sources, }); } @@ -136,6 +136,10 @@ export class ResearchStateManager { getUrlToSearch(): string { return this.urlToSearch; } + + getSources(): DeepResearchSource[] { + return this.sources; + } } export class ResearchLLMService { @@ -254,17 +258,12 @@ export class ResearchLLMService { logger: this.logger.child({ method: "generateFinalAnalysis", }), + mode: "no-object", options: { mode: "llm", systemPrompt: "You are an expert research analyst who creates comprehensive, well-structured reports. Your reports are detailed, properly formatted in Markdown, and include clear sections with citations. Today's date is " + new Date().toISOString().split("T")[0], - schema: { - type: "object", - properties: { - report: { type: "string" }, - }, - }, prompt: trimToTokenLimit( `Create a comprehensive research report on "${topic}" based on the collected findings and analysis. @@ -285,6 +284,6 @@ export class ResearchLLMService { markdown: "", }); - return extract.report; + return extract; } } diff --git a/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts b/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts index 29bf4f6f..e4cdd5a8 100644 --- a/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts +++ b/apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts @@ -156,6 +156,7 @@ export async function generateCompletions({ previousWarning, isExtractEndpoint, model = getModel("gpt-4o-mini"), + mode = "object", }: { model?: LanguageModel; logger: Logger; @@ -163,6 +164,7 @@ export async function generateCompletions({ markdown?: string; previousWarning?: string; isExtractEndpoint?: boolean; + mode?: "object" | "no-object"; }): Promise<{ extract: any; numTokens: number; @@ -192,44 +194,67 @@ export async function generateCompletions({ markdown = trimmedMarkdown; warning = trimWarning; - let schema = options.schema; - // Normalize the bad json schema users write (mogery) - if (schema && !(schema instanceof z.ZodType)) { - // let schema = options.schema; - if (schema) { - schema = removeDefaultProperty(schema); - } - - if (schema && schema.type === "array") { - schema = { - type: "object", - properties: { - items: options.schema, - }, - required: ["items"], - additionalProperties: false, - }; - } else if (schema && typeof schema === "object" && !schema.type) { - schema = { - type: "object", - properties: Object.fromEntries( - Object.entries(schema).map(([key, value]) => { - return [key, removeDefaultProperty(value)]; - }), - ), - required: Object.keys(schema), - additionalProperties: false, - }; - } - - schema = normalizeSchema(schema); - } - try { const prompt = options.prompt !== undefined ? `Transform the following content into structured JSON output based on the provided schema and this user request: ${options.prompt}. If schema is provided, strictly follow it.\n\n${markdown}` : `Transform the following content into structured JSON output based on the provided schema if any.\n\n${markdown}`; + if (mode === "no-object") { + const result = await generateText({ + model: model, + prompt: options.prompt + (markdown ? `\n\nData:${markdown}` : ""), + temperature: options.temperature ?? 0, + system: options.systemPrompt, + }); + + extract = result.text; + + return { + extract, + warning, + numTokens, + totalUsage: { + promptTokens: numTokens, + completionTokens: result.usage?.completionTokens ?? 0, + totalTokens: numTokens + (result.usage?.completionTokens ?? 0), + }, + model: model.modelId, + }; + } + + let schema = options.schema; + // Normalize the bad json schema users write (mogery) + if (schema && !(schema instanceof z.ZodType)) { + // let schema = options.schema; + if (schema) { + schema = removeDefaultProperty(schema); + } + + if (schema && schema.type === "array") { + schema = { + type: "object", + properties: { + items: options.schema, + }, + required: ["items"], + additionalProperties: false, + }; + } else if (schema && typeof schema === "object" && !schema.type) { + schema = { + type: "object", + properties: Object.fromEntries( + Object.entries(schema).map(([key, value]) => { + return [key, removeDefaultProperty(value)]; + }), + ), + required: Object.keys(schema), + additionalProperties: false, + }; + } + + schema = normalizeSchema(schema); + } + const repairConfig = { experimental_repairText: async ({ text, error }) => { const { text: fixedText } = await generateText({ @@ -241,7 +266,6 @@ export async function generateCompletions({ } }; - const generateObjectConfig = { model: model, prompt: prompt, diff --git a/apps/api/src/services/queue-worker.ts b/apps/api/src/services/queue-worker.ts index cb967b57..03a9d52f 100644 --- a/apps/api/src/services/queue-worker.ts +++ b/apps/api/src/services/queue-worker.ts @@ -410,6 +410,7 @@ const processDeepResearchJobInternal = async ( maxDepth: job.data.request.maxDepth, timeLimit: job.data.request.timeLimit, subId: job.data.subId, + maxUrls: job.data.request.maxUrls, }); if(result.success) { diff --git a/apps/js-sdk/firecrawl/package.json b/apps/js-sdk/firecrawl/package.json index e3d8b825..23f8bc6d 100644 --- a/apps/js-sdk/firecrawl/package.json +++ b/apps/js-sdk/firecrawl/package.json @@ -1,6 +1,6 @@ { "name": "@mendable/firecrawl-js", - "version": "1.18.3-beta.1", + "version": "1.18.4", "description": "JavaScript SDK for Firecrawl API", "main": "dist/index.js", "types": "dist/index.d.ts", diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index 440b12e5..afb0b4e0 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -364,6 +364,11 @@ export interface DeepResearchParams { * @default 270 */ timeLimit?: number; + /** + * Maximum number of URLs to analyze (1-1000) + * @default 20 + */ + maxUrls?: number; /** * Experimental flag for streaming steps */