mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-01 04:12:00 +08:00
(feat/deep-research) Deep Research Alpha v1 - Structured Outputs + Customizability (#1365)
* Nick: * Nick: structured output support * Nick: support for zod and pydantic
This commit is contained in:
parent
3ee58f7a9e
commit
a50dc106ef
@ -37,6 +37,7 @@ export async function deepResearchStatusController(
|
|||||||
finalAnalysis: research.finalAnalysis,
|
finalAnalysis: research.finalAnalysis,
|
||||||
sources: research.sources,
|
sources: research.sources,
|
||||||
activities: research.activities,
|
activities: research.activities,
|
||||||
|
json: research.json,
|
||||||
// completedSteps: research.completedSteps,
|
// completedSteps: research.completedSteps,
|
||||||
// totalSteps: research.totalExpectedSteps,
|
// totalSteps: research.totalExpectedSteps,
|
||||||
},
|
},
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
import { Request, Response } from "express";
|
import { Request, Response } from "express";
|
||||||
import { RequestWithAuth } from "./types";
|
import { extractOptions, RequestWithAuth } from "./types";
|
||||||
import { getDeepResearchQueue } from "../../services/queue-service";
|
import { getDeepResearchQueue } from "../../services/queue-service";
|
||||||
import * as Sentry from "@sentry/node";
|
import * as Sentry from "@sentry/node";
|
||||||
import { saveDeepResearch } from "../../lib/deep-research/deep-research-redis";
|
import { saveDeepResearch } from "../../lib/deep-research/deep-research-redis";
|
||||||
@ -11,10 +11,19 @@ export const deepResearchRequestSchema = z.object({
|
|||||||
maxUrls: z.number().min(1).max(1000).default(20).describe('Maximum number of URLs to analyze'),
|
maxUrls: z.number().min(1).max(1000).default(20).describe('Maximum number of URLs to analyze'),
|
||||||
timeLimit: z.number().min(30).max(600).default(300).describe('Time limit in seconds'),
|
timeLimit: z.number().min(30).max(600).default(300).describe('Time limit in seconds'),
|
||||||
analysisPrompt: z.string().describe('The prompt to use for the final analysis').optional(),
|
analysisPrompt: z.string().describe('The prompt to use for the final analysis').optional(),
|
||||||
|
systemPrompt: z.string().describe('The system prompt to use for the research agent').optional(),
|
||||||
|
formats: z.array(z.enum(['markdown', 'json'])).default(['markdown']),
|
||||||
// @deprecated Use query instead
|
// @deprecated Use query instead
|
||||||
topic: z.string().describe('The topic or question to research').optional(),
|
topic: z.string().describe('The topic or question to research').optional(),
|
||||||
|
jsonOptions: extractOptions.optional(),
|
||||||
}).refine(data => data.query || data.topic, {
|
}).refine(data => data.query || data.topic, {
|
||||||
message: "Either query or topic must be provided"
|
message: "Either query or topic must be provided"
|
||||||
|
}).refine((obj) => {
|
||||||
|
const hasJsonFormat = obj.formats?.includes("json");
|
||||||
|
const hasJsonOptions = obj.jsonOptions !== undefined;
|
||||||
|
return (hasJsonFormat && hasJsonOptions) || (!hasJsonFormat && !hasJsonOptions);
|
||||||
|
}, {
|
||||||
|
message: "When 'json' format is specified, jsonOptions must be provided, and vice versa"
|
||||||
}).transform(data => ({
|
}).transform(data => ({
|
||||||
...data,
|
...data,
|
||||||
query: data.topic || data.query // Use topic as query if provided
|
query: data.topic || data.query // Use topic as query if provided
|
||||||
|
@ -45,6 +45,7 @@ export type StoredDeepResearch = {
|
|||||||
activities: DeepResearchActivity[];
|
activities: DeepResearchActivity[];
|
||||||
summaries: string[];
|
summaries: string[];
|
||||||
finalAnalysis?: string;
|
finalAnalysis?: string;
|
||||||
|
json?: any;
|
||||||
};
|
};
|
||||||
|
|
||||||
// TTL of 6 hours
|
// TTL of 6 hours
|
||||||
|
@ -5,6 +5,7 @@ import { searchAndScrapeSearchResult } from "../../controllers/v1/search";
|
|||||||
import { ResearchLLMService, ResearchStateManager } from "./research-manager";
|
import { ResearchLLMService, ResearchStateManager } from "./research-manager";
|
||||||
import { logJob } from "../../services/logging/log_job";
|
import { logJob } from "../../services/logging/log_job";
|
||||||
import { billTeam } from "../../services/billing/credit_billing";
|
import { billTeam } from "../../services/billing/credit_billing";
|
||||||
|
import { ExtractOptions } from "../../controllers/v1/types";
|
||||||
|
|
||||||
interface DeepResearchServiceOptions {
|
interface DeepResearchServiceOptions {
|
||||||
researchId: string;
|
researchId: string;
|
||||||
@ -15,6 +16,9 @@ interface DeepResearchServiceOptions {
|
|||||||
maxUrls: number;
|
maxUrls: number;
|
||||||
timeLimit: number;
|
timeLimit: number;
|
||||||
analysisPrompt: string;
|
analysisPrompt: string;
|
||||||
|
systemPrompt: string;
|
||||||
|
formats: string[];
|
||||||
|
jsonOptions: ExtractOptions;
|
||||||
subId?: string;
|
subId?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -54,13 +58,13 @@ export async function performDeepResearch(options: DeepResearchServiceOptions) {
|
|||||||
await state.incrementDepth();
|
await state.incrementDepth();
|
||||||
|
|
||||||
// Search phase
|
// Search phase
|
||||||
await state.addActivity({
|
await state.addActivity([{
|
||||||
type: "search",
|
type: "search",
|
||||||
status: "processing",
|
status: "processing",
|
||||||
message: `Generating deeper search queries for "${currentTopic}"`,
|
message: `Generating deeper search queries for "${currentTopic}"`,
|
||||||
timestamp: new Date().toISOString(),
|
timestamp: new Date().toISOString(),
|
||||||
depth: state.getCurrentDepth(),
|
depth: state.getCurrentDepth(),
|
||||||
});
|
}]);
|
||||||
|
|
||||||
const nextSearchTopic = state.getNextSearchTopic();
|
const nextSearchTopic = state.getNextSearchTopic();
|
||||||
logger.debug("[Deep Research] Next search topic:", { nextSearchTopic });
|
logger.debug("[Deep Research] Next search topic:", { nextSearchTopic });
|
||||||
@ -74,23 +78,23 @@ export async function performDeepResearch(options: DeepResearchServiceOptions) {
|
|||||||
|
|
||||||
logger.debug("[Deep Research] Generated search queries:", { searchQueries });
|
logger.debug("[Deep Research] Generated search queries:", { searchQueries });
|
||||||
|
|
||||||
await state.addActivity({
|
await state.addActivity([{
|
||||||
type: "search",
|
type: "search",
|
||||||
status: "processing",
|
status: "processing",
|
||||||
message: `Starting ${searchQueries.length} parallel searches for "${currentTopic}"`,
|
message: `Starting ${searchQueries.length} parallel searches for "${currentTopic}"`,
|
||||||
timestamp: new Date().toISOString(),
|
timestamp: new Date().toISOString(),
|
||||||
depth: state.getCurrentDepth(),
|
depth: state.getCurrentDepth(),
|
||||||
});
|
}]);
|
||||||
|
await state.addActivity(searchQueries.map(searchQuery => ({
|
||||||
|
type: "search",
|
||||||
|
status: "processing",
|
||||||
|
message: `Searching for "${searchQuery.query}" - Goal: ${searchQuery.researchGoal}`,
|
||||||
|
timestamp: new Date().toISOString(),
|
||||||
|
depth: state.getCurrentDepth(),
|
||||||
|
})))
|
||||||
|
|
||||||
// Run all searches in parallel
|
// Run all searches in parallel
|
||||||
const searchPromises = searchQueries.map(async (searchQuery) => {
|
const searchPromises = searchQueries.map(async (searchQuery) => {
|
||||||
await state.addActivity({
|
|
||||||
type: "search",
|
|
||||||
status: "processing",
|
|
||||||
message: `Searching for "${searchQuery.query}" - Goal: ${searchQuery.researchGoal}`,
|
|
||||||
timestamp: new Date().toISOString(),
|
|
||||||
depth: state.getCurrentDepth(),
|
|
||||||
});
|
|
||||||
|
|
||||||
const response = await searchAndScrapeSearchResult(searchQuery.query, {
|
const response = await searchAndScrapeSearchResult(searchQuery.query, {
|
||||||
teamId: options.teamId,
|
teamId: options.teamId,
|
||||||
@ -126,13 +130,13 @@ export async function performDeepResearch(options: DeepResearchServiceOptions) {
|
|||||||
"[Deep Research] No results found for topic:",
|
"[Deep Research] No results found for topic:",
|
||||||
{ currentTopic },
|
{ currentTopic },
|
||||||
);
|
);
|
||||||
await state.addActivity({
|
await state.addActivity([{
|
||||||
type: "search",
|
type: "search",
|
||||||
status: "error",
|
status: "error",
|
||||||
message: `No results found for any queries about "${currentTopic}"`,
|
message: `No results found for any queries about "${currentTopic}"`,
|
||||||
timestamp: new Date().toISOString(),
|
timestamp: new Date().toISOString(),
|
||||||
depth: state.getCurrentDepth(),
|
depth: state.getCurrentDepth(),
|
||||||
});
|
}]);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -163,23 +167,23 @@ export async function performDeepResearch(options: DeepResearchServiceOptions) {
|
|||||||
"[Deep Research] No new unique results found for topic:",
|
"[Deep Research] No new unique results found for topic:",
|
||||||
{ currentTopic },
|
{ currentTopic },
|
||||||
);
|
);
|
||||||
await state.addActivity({
|
await state.addActivity([{
|
||||||
type: "search",
|
type: "search",
|
||||||
status: "error",
|
status: "error",
|
||||||
message: `Found ${searchResults.length} results but all URLs were already processed for "${currentTopic}"`,
|
message: `Found ${searchResults.length} results but all URLs were already processed for "${currentTopic}"`,
|
||||||
timestamp: new Date().toISOString(),
|
timestamp: new Date().toISOString(),
|
||||||
depth: state.getCurrentDepth(),
|
depth: state.getCurrentDepth(),
|
||||||
});
|
}]);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
await state.addActivity({
|
await state.addActivity([{
|
||||||
type: "search",
|
type: "search",
|
||||||
status: "complete",
|
status: "complete",
|
||||||
message: `Found ${newSearchResults.length} new relevant results across ${searchQueries.length} parallel queries`,
|
message: `Found ${newSearchResults.length} new relevant results across ${searchQueries.length} parallel queries`,
|
||||||
timestamp: new Date().toISOString(),
|
timestamp: new Date().toISOString(),
|
||||||
depth: state.getCurrentDepth(),
|
depth: state.getCurrentDepth(),
|
||||||
});
|
}]);
|
||||||
|
|
||||||
await state.addFindings(
|
await state.addFindings(
|
||||||
newSearchResults.map((result) => ({
|
newSearchResults.map((result) => ({
|
||||||
@ -189,13 +193,13 @@ export async function performDeepResearch(options: DeepResearchServiceOptions) {
|
|||||||
);
|
);
|
||||||
|
|
||||||
// Analysis phase
|
// Analysis phase
|
||||||
await state.addActivity({
|
await state.addActivity([{
|
||||||
type: "analyze",
|
type: "analyze",
|
||||||
status: "processing",
|
status: "processing",
|
||||||
message: "Analyzing findings and planning next steps",
|
message: "Analyzing findings and planning next steps",
|
||||||
timestamp: new Date().toISOString(),
|
timestamp: new Date().toISOString(),
|
||||||
depth: state.getCurrentDepth(),
|
depth: state.getCurrentDepth(),
|
||||||
});
|
}]);
|
||||||
|
|
||||||
const timeRemaining = timeLimit * 1000 - (Date.now() - startTime);
|
const timeRemaining = timeLimit * 1000 - (Date.now() - startTime);
|
||||||
logger.debug("[Deep Research] Time remaining (ms):", { timeRemaining });
|
logger.debug("[Deep Research] Time remaining (ms):", { timeRemaining });
|
||||||
@ -204,17 +208,18 @@ export async function performDeepResearch(options: DeepResearchServiceOptions) {
|
|||||||
state.getFindings(),
|
state.getFindings(),
|
||||||
currentTopic,
|
currentTopic,
|
||||||
timeRemaining,
|
timeRemaining,
|
||||||
|
options.systemPrompt ?? "",
|
||||||
);
|
);
|
||||||
|
|
||||||
if (!analysis) {
|
if (!analysis) {
|
||||||
logger.debug("[Deep Research] Analysis failed");
|
logger.debug("[Deep Research] Analysis failed");
|
||||||
await state.addActivity({
|
await state.addActivity([{
|
||||||
type: "analyze",
|
type: "analyze",
|
||||||
status: "error",
|
status: "error",
|
||||||
message: "Failed to analyze findings",
|
message: "Failed to analyze findings",
|
||||||
timestamp: new Date().toISOString(),
|
timestamp: new Date().toISOString(),
|
||||||
depth: state.getCurrentDepth(),
|
depth: state.getCurrentDepth(),
|
||||||
});
|
}]);
|
||||||
|
|
||||||
state.incrementFailedAttempts();
|
state.incrementFailedAttempts();
|
||||||
if (state.hasReachedMaxFailedAttempts()) {
|
if (state.hasReachedMaxFailedAttempts()) {
|
||||||
@ -232,13 +237,13 @@ export async function performDeepResearch(options: DeepResearchServiceOptions) {
|
|||||||
|
|
||||||
state.setNextSearchTopic(analysis.nextSearchTopic || "");
|
state.setNextSearchTopic(analysis.nextSearchTopic || "");
|
||||||
|
|
||||||
await state.addActivity({
|
await state.addActivity([{
|
||||||
type: "analyze",
|
type: "analyze",
|
||||||
status: "complete",
|
status: "complete",
|
||||||
message: "Analyzed findings",
|
message: "Analyzed findings",
|
||||||
timestamp: new Date().toISOString(),
|
timestamp: new Date().toISOString(),
|
||||||
depth: state.getCurrentDepth(),
|
depth: state.getCurrentDepth(),
|
||||||
});
|
}]);
|
||||||
|
|
||||||
if (!analysis.shouldContinue || analysis.gaps.length === 0) {
|
if (!analysis.shouldContinue || analysis.gaps.length === 0) {
|
||||||
logger.debug("[Deep Research] No more gaps to research, ending search");
|
logger.debug("[Deep Research] No more gaps to research, ending search");
|
||||||
@ -251,28 +256,42 @@ export async function performDeepResearch(options: DeepResearchServiceOptions) {
|
|||||||
|
|
||||||
// Final synthesis
|
// Final synthesis
|
||||||
logger.debug("[Deep Research] Starting final synthesis");
|
logger.debug("[Deep Research] Starting final synthesis");
|
||||||
await state.addActivity({
|
await state.addActivity([{
|
||||||
type: "synthesis",
|
type: "synthesis",
|
||||||
status: "processing",
|
status: "processing",
|
||||||
message: "Preparing final analysis",
|
message: "Preparing final analysis",
|
||||||
timestamp: new Date().toISOString(),
|
timestamp: new Date().toISOString(),
|
||||||
depth: state.getCurrentDepth(),
|
depth: state.getCurrentDepth(),
|
||||||
});
|
}]);
|
||||||
|
|
||||||
const finalAnalysis = await llmService.generateFinalAnalysis(
|
let finalAnalysis = "";
|
||||||
options.query,
|
let finalAnalysisJson = null;
|
||||||
state.getFindings(),
|
if(options.formats.includes('json')) {
|
||||||
state.getSummaries(),
|
finalAnalysisJson = await llmService.generateFinalAnalysis(
|
||||||
options.analysisPrompt,
|
options.query,
|
||||||
);
|
state.getFindings(),
|
||||||
|
state.getSummaries(),
|
||||||
|
options.analysisPrompt,
|
||||||
|
options.formats,
|
||||||
|
options.jsonOptions,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if(options.formats.includes('markdown')) {
|
||||||
|
finalAnalysis = await llmService.generateFinalAnalysis(
|
||||||
|
options.query,
|
||||||
|
state.getFindings(),
|
||||||
|
state.getSummaries(),
|
||||||
|
options.analysisPrompt,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
await state.addActivity({
|
await state.addActivity([{
|
||||||
type: "synthesis",
|
type: "synthesis",
|
||||||
status: "complete",
|
status: "complete",
|
||||||
message: "Research completed",
|
message: "Research completed",
|
||||||
timestamp: new Date().toISOString(),
|
timestamp: new Date().toISOString(),
|
||||||
depth: state.getCurrentDepth(),
|
depth: state.getCurrentDepth(),
|
||||||
});
|
}]);
|
||||||
|
|
||||||
const progress = state.getProgress();
|
const progress = state.getProgress();
|
||||||
logger.debug("[Deep Research] Research completed successfully");
|
logger.debug("[Deep Research] Research completed successfully");
|
||||||
@ -283,7 +302,7 @@ export async function performDeepResearch(options: DeepResearchServiceOptions) {
|
|||||||
success: true,
|
success: true,
|
||||||
message: "Research completed",
|
message: "Research completed",
|
||||||
num_docs: 1,
|
num_docs: 1,
|
||||||
docs: [{ finalAnalysis: finalAnalysis, sources: state.getSources() }],
|
docs: [{ finalAnalysis: finalAnalysis, sources: state.getSources(), json: finalAnalysisJson }],
|
||||||
time_taken: (Date.now() - startTime) / 1000,
|
time_taken: (Date.now() - startTime) / 1000,
|
||||||
team_id: teamId,
|
team_id: teamId,
|
||||||
mode: "deep-research",
|
mode: "deep-research",
|
||||||
@ -296,6 +315,7 @@ export async function performDeepResearch(options: DeepResearchServiceOptions) {
|
|||||||
await updateDeepResearch(researchId, {
|
await updateDeepResearch(researchId, {
|
||||||
status: "completed",
|
status: "completed",
|
||||||
finalAnalysis: finalAnalysis,
|
finalAnalysis: finalAnalysis,
|
||||||
|
json: finalAnalysisJson,
|
||||||
});
|
});
|
||||||
// Bill team for usage based on URLs analyzed
|
// Bill team for usage based on URLs analyzed
|
||||||
billTeam(teamId, subId, Math.min(urlsAnalyzed, options.maxUrls), logger).catch(
|
billTeam(teamId, subId, Math.min(urlsAnalyzed, options.maxUrls), logger).catch(
|
||||||
@ -310,6 +330,7 @@ export async function performDeepResearch(options: DeepResearchServiceOptions) {
|
|||||||
data: {
|
data: {
|
||||||
finalAnalysis: finalAnalysis,
|
finalAnalysis: finalAnalysis,
|
||||||
sources: state.getSources(),
|
sources: state.getSources(),
|
||||||
|
json: finalAnalysisJson,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
} catch (error: any) {
|
} catch (error: any) {
|
||||||
|
@ -6,7 +6,7 @@ import {
|
|||||||
updateDeepResearch,
|
updateDeepResearch,
|
||||||
} from "./deep-research-redis";
|
} from "./deep-research-redis";
|
||||||
import { generateCompletions, trimToTokenLimit } from "../../scraper/scrapeURL/transformers/llmExtract";
|
import { generateCompletions, trimToTokenLimit } from "../../scraper/scrapeURL/transformers/llmExtract";
|
||||||
|
import { ExtractOptions } from "../../controllers/v1/types";
|
||||||
interface AnalysisResult {
|
interface AnalysisResult {
|
||||||
gaps: string[];
|
gaps: string[];
|
||||||
nextSteps: string[];
|
nextSteps: string[];
|
||||||
@ -50,13 +50,13 @@ export class ResearchStateManager {
|
|||||||
return this.seenUrls;
|
return this.seenUrls;
|
||||||
}
|
}
|
||||||
|
|
||||||
async addActivity(activity: DeepResearchActivity): Promise<void> {
|
async addActivity(activities: DeepResearchActivity[]): Promise<void> {
|
||||||
if (activity.status === "complete") {
|
if (activities.some(activity => activity.status === "complete")) {
|
||||||
this.completedSteps++;
|
this.completedSteps++;
|
||||||
}
|
}
|
||||||
|
|
||||||
await updateDeepResearch(this.researchId, {
|
await updateDeepResearch(this.researchId, {
|
||||||
activities: [activity],
|
activities: activities,
|
||||||
completedSteps: this.completedSteps,
|
completedSteps: this.completedSteps,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -199,6 +199,7 @@ export class ResearchLLMService {
|
|||||||
findings: DeepResearchFinding[],
|
findings: DeepResearchFinding[],
|
||||||
currentTopic: string,
|
currentTopic: string,
|
||||||
timeRemaining: number,
|
timeRemaining: number,
|
||||||
|
systemPrompt: string,
|
||||||
): Promise<AnalysisResult | null> {
|
): Promise<AnalysisResult | null> {
|
||||||
try {
|
try {
|
||||||
const timeRemainingMinutes =
|
const timeRemainingMinutes =
|
||||||
@ -211,6 +212,7 @@ export class ResearchLLMService {
|
|||||||
options: {
|
options: {
|
||||||
mode: "llm",
|
mode: "llm",
|
||||||
systemPrompt:
|
systemPrompt:
|
||||||
|
systemPrompt +
|
||||||
"You are an expert research agent that is analyzing findings. Your goal is to synthesize information and identify gaps for further research. Today's date is " +
|
"You are an expert research agent that is analyzing findings. Your goal is to synthesize information and identify gaps for further research. Today's date is " +
|
||||||
new Date().toISOString().split("T")[0],
|
new Date().toISOString().split("T")[0],
|
||||||
schema: {
|
schema: {
|
||||||
@ -254,33 +256,48 @@ export class ResearchLLMService {
|
|||||||
findings: DeepResearchFinding[],
|
findings: DeepResearchFinding[],
|
||||||
summaries: string[],
|
summaries: string[],
|
||||||
analysisPrompt: string,
|
analysisPrompt: string,
|
||||||
): Promise<string> {
|
formats?: string[],
|
||||||
|
jsonOptions?: ExtractOptions,
|
||||||
|
): Promise<any> {
|
||||||
|
if(!formats) {
|
||||||
|
formats = ['markdown'];
|
||||||
|
}
|
||||||
|
if(!jsonOptions) {
|
||||||
|
jsonOptions = undefined;
|
||||||
|
}
|
||||||
|
|
||||||
const { extract } = await generateCompletions({
|
const { extract } = await generateCompletions({
|
||||||
logger: this.logger.child({
|
logger: this.logger.child({
|
||||||
method: "generateFinalAnalysis",
|
method: "generateFinalAnalysis",
|
||||||
}),
|
}),
|
||||||
mode: "no-object",
|
mode: formats.includes('json') ? 'object' : 'no-object',
|
||||||
options: {
|
options: {
|
||||||
mode: "llm",
|
mode: "llm",
|
||||||
systemPrompt:
|
...(formats.includes('json') && {
|
||||||
"You are an expert research analyst who creates comprehensive, well-structured reports. Your reports are detailed, properly formatted in Markdown, and include clear sections with citations. Today's date is " +
|
...jsonOptions
|
||||||
new Date().toISOString().split("T")[0],
|
}),
|
||||||
|
systemPrompt: formats.includes('json')
|
||||||
|
? "You are an expert research analyst who creates comprehensive, structured analysis following the provided JSON schema exactly."
|
||||||
|
: "You are an expert research analyst who creates comprehensive, well-structured reports. Your reports are detailed, properly formatted in Markdown, and include clear sections with citations. Today's date is " +
|
||||||
|
new Date().toISOString().split("T")[0],
|
||||||
prompt: trimToTokenLimit(
|
prompt: trimToTokenLimit(
|
||||||
analysisPrompt
|
analysisPrompt
|
||||||
? `${analysisPrompt}\n\nResearch data:\n${findings.map((f) => `[From ${f.source}]: ${f.text}`).join("\n")}`
|
? `${analysisPrompt}\n\nResearch data:\n${findings.map((f) => `[From ${f.source}]: ${f.text}`).join("\n")}`
|
||||||
: `Create a comprehensive research report on "${topic}" based on the collected findings and analysis.
|
: formats.includes('json')
|
||||||
|
? `Analyze the following research data on "${topic}" and structure the output according to the provided schema: Schema: ${JSON.stringify(jsonOptions?.schema)}\n\nFindings:\n\n${findings.map((f) => `[From ${f.source}]: ${f.text}`).join("\n")}`
|
||||||
|
: `Create a comprehensive research report on "${topic}" based on the collected findings and analysis.
|
||||||
|
|
||||||
Research data:
|
Research data:
|
||||||
${findings.map((f) => `[From ${f.source}]: ${f.text}`).join("\n")}
|
${findings.map((f) => `[From ${f.source}]: ${f.text}`).join("\n")}
|
||||||
|
|
||||||
Requirements:
|
Requirements:
|
||||||
- Format the report in Markdown with proper headers and sections
|
- Format the report in Markdown with proper headers and sections
|
||||||
- Include specific citations to sources where appropriate
|
- Include specific citations to sources where appropriate
|
||||||
- Provide detailed analysis in each section
|
- Provide detailed analysis in each section
|
||||||
- Make it comprehensive and thorough (aim for 4+ pages worth of content)
|
- Make it comprehensive and thorough (aim for 4+ pages worth of content)
|
||||||
- Include all relevant findings and insights from the research
|
- Include all relevant findings and insights from the research
|
||||||
- Cite sources
|
- Cite sources
|
||||||
- Use bullet points and lists where appropriate for readability`,
|
- Use bullet points and lists where appropriate for readability`,
|
||||||
100000,
|
100000,
|
||||||
).text,
|
).text,
|
||||||
},
|
},
|
||||||
|
@ -413,6 +413,9 @@ const processDeepResearchJobInternal = async (
|
|||||||
subId: job.data.subId,
|
subId: job.data.subId,
|
||||||
maxUrls: job.data.request.maxUrls,
|
maxUrls: job.data.request.maxUrls,
|
||||||
analysisPrompt: job.data.request.analysisPrompt,
|
analysisPrompt: job.data.request.analysisPrompt,
|
||||||
|
systemPrompt: job.data.request.systemPrompt,
|
||||||
|
formats: job.data.request.formats,
|
||||||
|
jsonOptions: job.data.request.jsonOptions,
|
||||||
});
|
});
|
||||||
|
|
||||||
if(result.success) {
|
if(result.success) {
|
||||||
|
@ -356,7 +356,7 @@ export interface CrawlErrorsResponse {
|
|||||||
* Parameters for deep research operations.
|
* Parameters for deep research operations.
|
||||||
* Defines options for conducting deep research on a query.
|
* Defines options for conducting deep research on a query.
|
||||||
*/
|
*/
|
||||||
export interface DeepResearchParams {
|
export interface DeepResearchParams<LLMSchema extends zt.ZodSchema = any> {
|
||||||
/**
|
/**
|
||||||
* Maximum depth of research iterations (1-10)
|
* Maximum depth of research iterations (1-10)
|
||||||
* @default 7
|
* @default 7
|
||||||
@ -377,9 +377,25 @@ export interface DeepResearchParams {
|
|||||||
*/
|
*/
|
||||||
analysisPrompt?: string;
|
analysisPrompt?: string;
|
||||||
/**
|
/**
|
||||||
|
* The system prompt to use for the research agent
|
||||||
|
*/
|
||||||
|
systemPrompt?: string;
|
||||||
|
/**
|
||||||
|
* The formats to use for the final analysis
|
||||||
|
*/
|
||||||
|
formats?: ("markdown" | "json")[];
|
||||||
|
/**
|
||||||
|
* The JSON options to use for the final analysis
|
||||||
|
*/
|
||||||
|
jsonOptions?:{
|
||||||
|
prompt?: string;
|
||||||
|
schema?: LLMSchema;
|
||||||
|
systemPrompt?: string;
|
||||||
|
};
|
||||||
|
/**
|
||||||
* Experimental flag for streaming steps
|
* Experimental flag for streaming steps
|
||||||
*/
|
*/
|
||||||
__experimental_streamSteps?: boolean;
|
// __experimental_streamSteps?: boolean;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -1420,7 +1436,7 @@ export default class FirecrawlApp {
|
|||||||
*/
|
*/
|
||||||
async deepResearch(
|
async deepResearch(
|
||||||
query: string,
|
query: string,
|
||||||
params: DeepResearchParams,
|
params: DeepResearchParams<zt.ZodSchema>,
|
||||||
onActivity?: (activity: {
|
onActivity?: (activity: {
|
||||||
type: string;
|
type: string;
|
||||||
status: string;
|
status: string;
|
||||||
@ -1505,12 +1521,31 @@ export default class FirecrawlApp {
|
|||||||
* @param params - Parameters for the deep research operation.
|
* @param params - Parameters for the deep research operation.
|
||||||
* @returns The response containing the research job ID.
|
* @returns The response containing the research job ID.
|
||||||
*/
|
*/
|
||||||
async asyncDeepResearch(query: string, params: DeepResearchParams): Promise<DeepResearchResponse | ErrorResponse> {
|
async asyncDeepResearch(query: string, params: DeepResearchParams<zt.ZodSchema>): Promise<DeepResearchResponse | ErrorResponse> {
|
||||||
const headers = this.prepareHeaders();
|
const headers = this.prepareHeaders();
|
||||||
|
let jsonData: any = { query, ...params };
|
||||||
|
|
||||||
|
if (jsonData?.jsonOptions?.schema) {
|
||||||
|
let schema = jsonData.jsonOptions.schema;
|
||||||
|
// Try parsing the schema as a Zod schema
|
||||||
|
try {
|
||||||
|
schema = zodToJsonSchema(schema);
|
||||||
|
} catch (error) {
|
||||||
|
|
||||||
|
}
|
||||||
|
jsonData = {
|
||||||
|
...jsonData,
|
||||||
|
jsonOptions: {
|
||||||
|
...jsonData.jsonOptions,
|
||||||
|
schema: schema,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const response: AxiosResponse = await this.postRequest(
|
const response: AxiosResponse = await this.postRequest(
|
||||||
`${this.apiUrl}/v1/deep-research`,
|
`${this.apiUrl}/v1/deep-research`,
|
||||||
{ query, ...params },
|
jsonData,
|
||||||
headers
|
headers
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -49,6 +49,7 @@ class DeepResearchParams(pydantic.BaseModel):
|
|||||||
timeLimit: Optional[int] = 270
|
timeLimit: Optional[int] = 270
|
||||||
maxUrls: Optional[int] = 20
|
maxUrls: Optional[int] = 20
|
||||||
analysisPrompt: Optional[str] = None
|
analysisPrompt: Optional[str] = None
|
||||||
|
systemPrompt: Optional[str] = None
|
||||||
__experimental_streamSteps: Optional[bool] = None
|
__experimental_streamSteps: Optional[bool] = None
|
||||||
|
|
||||||
class DeepResearchResponse(pydantic.BaseModel):
|
class DeepResearchResponse(pydantic.BaseModel):
|
||||||
@ -1171,7 +1172,6 @@ class FirecrawlApp:
|
|||||||
time.sleep(2) # Polling interval
|
time.sleep(2) # Polling interval
|
||||||
|
|
||||||
return {'success': False, 'error': 'Deep research job terminated unexpectedly'}
|
return {'success': False, 'error': 'Deep research job terminated unexpectedly'}
|
||||||
|
|
||||||
def async_deep_research(self, query: str, params: Optional[Union[Dict[str, Any], DeepResearchParams]] = None) -> Dict[str, Any]:
|
def async_deep_research(self, query: str, params: Optional[Union[Dict[str, Any], DeepResearchParams]] = None) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Initiates an asynchronous deep research operation.
|
Initiates an asynchronous deep research operation.
|
||||||
@ -1195,8 +1195,15 @@ class FirecrawlApp:
|
|||||||
research_params = params
|
research_params = params
|
||||||
|
|
||||||
headers = self._prepare_headers()
|
headers = self._prepare_headers()
|
||||||
|
|
||||||
json_data = {'query': query, **research_params.dict(exclude_none=True)}
|
json_data = {'query': query, **research_params.dict(exclude_none=True)}
|
||||||
|
|
||||||
|
# Handle json options schema if present
|
||||||
|
if 'jsonOptions' in json_data:
|
||||||
|
json_opts = json_data['jsonOptions']
|
||||||
|
if json_opts and 'schema' in json_opts and hasattr(json_opts['schema'], 'schema'):
|
||||||
|
json_data['jsonOptions']['schema'] = json_opts['schema'].schema()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = self._post_request(f'{self.api_url}/v1/deep-research', json_data, headers)
|
response = self._post_request(f'{self.api_url}/v1/deep-research', json_data, headers)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user