mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-07-31 01:22:01 +08:00
added scrapeOptions to extract (#1133)
This commit is contained in:
parent
42f4f7ef09
commit
ac5c88bffb
@ -306,4 +306,35 @@ describe("E2E Tests for Extract API Routes", () => {
|
||||
},
|
||||
60000,
|
||||
);
|
||||
|
||||
it.concurrent(
|
||||
"should extract information with scrapeOptions.waitFor",
|
||||
async () => {
|
||||
const response = await request(TEST_URL)
|
||||
.post("/v1/extract")
|
||||
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||
.set("Content-Type", "application/json")
|
||||
.send({
|
||||
urls: ["https://firecrawl-e2e-test-git-main-rafaelsideguides-projects.vercel.app/"],
|
||||
prompt: "What is the content right after the #content-1 id?",
|
||||
schema: {
|
||||
type: "object",
|
||||
properties: {
|
||||
content: { type: "string" },
|
||||
},
|
||||
required: ["content"],
|
||||
},
|
||||
scrapeOptions: {
|
||||
waitFor: 6000,
|
||||
}
|
||||
});
|
||||
|
||||
expect(response.statusCode).toBe(200);
|
||||
expect(response.body).toHaveProperty("data");
|
||||
expect(typeof response.body.data).toBe("object");
|
||||
expect(response.body.data?.content).toBeDefined();
|
||||
expect(response.body.data?.content).toBe("Content loaded after 5 seconds!");
|
||||
},
|
||||
60000,
|
||||
);
|
||||
});
|
||||
|
@ -226,6 +226,7 @@ export const extractV1Options = z
|
||||
includeSubdomains: z.boolean().default(true),
|
||||
allowExternalLinks: z.boolean().default(false),
|
||||
enableWebSearch: z.boolean().default(false),
|
||||
scrapeOptions: scrapeOptions.default({ onlyMainContent: false }).optional(),
|
||||
origin: z.string().optional().default("api"),
|
||||
urlTrace: z.boolean().default(false),
|
||||
timeout: z.number().int().positive().finite().safe().default(60000),
|
||||
|
@ -294,6 +294,8 @@ export async function performExtraction(
|
||||
isMultiEntity: true,
|
||||
}),
|
||||
{
|
||||
...request.scrapeOptions,
|
||||
|
||||
// Needs to be true for multi-entity to work properly
|
||||
onlyMainContent: true,
|
||||
}
|
||||
@ -554,6 +556,7 @@ export async function performExtraction(
|
||||
url,
|
||||
isMultiEntity: false,
|
||||
}),
|
||||
request.scrapeOptions
|
||||
);
|
||||
}
|
||||
return docsMap.get(normalizeUrl(url));
|
||||
|
Loading…
x
Reference in New Issue
Block a user