added scrapeOptions to extract (#1133)

This commit is contained in:
Rafael Miller 2025-02-07 13:38:08 -03:00 committed by GitHub
parent 42f4f7ef09
commit ac5c88bffb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 35 additions and 0 deletions

View File

@ -306,4 +306,35 @@ describe("E2E Tests for Extract API Routes", () => {
},
60000,
);
it.concurrent(
"should extract information with scrapeOptions.waitFor",
async () => {
const response = await request(TEST_URL)
.post("/v1/extract")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
.send({
urls: ["https://firecrawl-e2e-test-git-main-rafaelsideguides-projects.vercel.app/"],
prompt: "What is the content right after the #content-1 id?",
schema: {
type: "object",
properties: {
content: { type: "string" },
},
required: ["content"],
},
scrapeOptions: {
waitFor: 6000,
}
});
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("data");
expect(typeof response.body.data).toBe("object");
expect(response.body.data?.content).toBeDefined();
expect(response.body.data?.content).toBe("Content loaded after 5 seconds!");
},
60000,
);
});

View File

@ -226,6 +226,7 @@ export const extractV1Options = z
includeSubdomains: z.boolean().default(true),
allowExternalLinks: z.boolean().default(false),
enableWebSearch: z.boolean().default(false),
scrapeOptions: scrapeOptions.default({ onlyMainContent: false }).optional(),
origin: z.string().optional().default("api"),
urlTrace: z.boolean().default(false),
timeout: z.number().int().positive().finite().safe().default(60000),

View File

@ -294,6 +294,8 @@ export async function performExtraction(
isMultiEntity: true,
}),
{
...request.scrapeOptions,
// Needs to be true for multi-entity to work properly
onlyMainContent: true,
}
@ -554,6 +556,7 @@ export async function performExtraction(
url,
isMultiEntity: false,
}),
request.scrapeOptions
);
}
return docsMap.get(normalizeUrl(url));