added scrapeOptions to extract (#1133)

This commit is contained in:
Rafael Miller 2025-02-07 13:38:08 -03:00 committed by GitHub
parent 42f4f7ef09
commit ac5c88bffb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 35 additions and 0 deletions

View File

@ -306,4 +306,35 @@ describe("E2E Tests for Extract API Routes", () => {
}, },
60000, 60000,
); );
it.concurrent(
"should extract information with scrapeOptions.waitFor",
async () => {
const response = await request(TEST_URL)
.post("/v1/extract")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
.send({
urls: ["https://firecrawl-e2e-test-git-main-rafaelsideguides-projects.vercel.app/"],
prompt: "What is the content right after the #content-1 id?",
schema: {
type: "object",
properties: {
content: { type: "string" },
},
required: ["content"],
},
scrapeOptions: {
waitFor: 6000,
}
});
expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty("data");
expect(typeof response.body.data).toBe("object");
expect(response.body.data?.content).toBeDefined();
expect(response.body.data?.content).toBe("Content loaded after 5 seconds!");
},
60000,
);
}); });

View File

@ -226,6 +226,7 @@ export const extractV1Options = z
includeSubdomains: z.boolean().default(true), includeSubdomains: z.boolean().default(true),
allowExternalLinks: z.boolean().default(false), allowExternalLinks: z.boolean().default(false),
enableWebSearch: z.boolean().default(false), enableWebSearch: z.boolean().default(false),
scrapeOptions: scrapeOptions.default({ onlyMainContent: false }).optional(),
origin: z.string().optional().default("api"), origin: z.string().optional().default("api"),
urlTrace: z.boolean().default(false), urlTrace: z.boolean().default(false),
timeout: z.number().int().positive().finite().safe().default(60000), timeout: z.number().int().positive().finite().safe().default(60000),

View File

@ -294,6 +294,8 @@ export async function performExtraction(
isMultiEntity: true, isMultiEntity: true,
}), }),
{ {
...request.scrapeOptions,
// Needs to be true for multi-entity to work properly // Needs to be true for multi-entity to work properly
onlyMainContent: true, onlyMainContent: true,
} }
@ -554,6 +556,7 @@ export async function performExtraction(
url, url,
isMultiEntity: false, isMultiEntity: false,
}), }),
request.scrapeOptions
); );
} }
return docsMap.get(normalizeUrl(url)); return docsMap.get(normalizeUrl(url));