mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-02 05:30:38 +08:00
added scrapeOptions to extract (#1133)
This commit is contained in:
parent
42f4f7ef09
commit
ac5c88bffb
@ -306,4 +306,35 @@ describe("E2E Tests for Extract API Routes", () => {
|
|||||||
},
|
},
|
||||||
60000,
|
60000,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
it.concurrent(
|
||||||
|
"should extract information with scrapeOptions.waitFor",
|
||||||
|
async () => {
|
||||||
|
const response = await request(TEST_URL)
|
||||||
|
.post("/v1/extract")
|
||||||
|
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||||
|
.set("Content-Type", "application/json")
|
||||||
|
.send({
|
||||||
|
urls: ["https://firecrawl-e2e-test-git-main-rafaelsideguides-projects.vercel.app/"],
|
||||||
|
prompt: "What is the content right after the #content-1 id?",
|
||||||
|
schema: {
|
||||||
|
type: "object",
|
||||||
|
properties: {
|
||||||
|
content: { type: "string" },
|
||||||
|
},
|
||||||
|
required: ["content"],
|
||||||
|
},
|
||||||
|
scrapeOptions: {
|
||||||
|
waitFor: 6000,
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(response.statusCode).toBe(200);
|
||||||
|
expect(response.body).toHaveProperty("data");
|
||||||
|
expect(typeof response.body.data).toBe("object");
|
||||||
|
expect(response.body.data?.content).toBeDefined();
|
||||||
|
expect(response.body.data?.content).toBe("Content loaded after 5 seconds!");
|
||||||
|
},
|
||||||
|
60000,
|
||||||
|
);
|
||||||
});
|
});
|
||||||
|
@ -226,6 +226,7 @@ export const extractV1Options = z
|
|||||||
includeSubdomains: z.boolean().default(true),
|
includeSubdomains: z.boolean().default(true),
|
||||||
allowExternalLinks: z.boolean().default(false),
|
allowExternalLinks: z.boolean().default(false),
|
||||||
enableWebSearch: z.boolean().default(false),
|
enableWebSearch: z.boolean().default(false),
|
||||||
|
scrapeOptions: scrapeOptions.default({ onlyMainContent: false }).optional(),
|
||||||
origin: z.string().optional().default("api"),
|
origin: z.string().optional().default("api"),
|
||||||
urlTrace: z.boolean().default(false),
|
urlTrace: z.boolean().default(false),
|
||||||
timeout: z.number().int().positive().finite().safe().default(60000),
|
timeout: z.number().int().positive().finite().safe().default(60000),
|
||||||
|
@ -294,6 +294,8 @@ export async function performExtraction(
|
|||||||
isMultiEntity: true,
|
isMultiEntity: true,
|
||||||
}),
|
}),
|
||||||
{
|
{
|
||||||
|
...request.scrapeOptions,
|
||||||
|
|
||||||
// Needs to be true for multi-entity to work properly
|
// Needs to be true for multi-entity to work properly
|
||||||
onlyMainContent: true,
|
onlyMainContent: true,
|
||||||
}
|
}
|
||||||
@ -554,6 +556,7 @@ export async function performExtraction(
|
|||||||
url,
|
url,
|
||||||
isMultiEntity: false,
|
isMultiEntity: false,
|
||||||
}),
|
}),
|
||||||
|
request.scrapeOptions
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
return docsMap.get(normalizeUrl(url));
|
return docsMap.get(normalizeUrl(url));
|
||||||
|
Loading…
x
Reference in New Issue
Block a user