mirror of
https://git.mirrors.martin98.com/https://github.com/mendableai/firecrawl
synced 2025-08-15 15:15:55 +08:00
fix
This commit is contained in:
parent
2ffde5abc1
commit
fea102dac6
@ -12,6 +12,7 @@ content-type: application/json
|
||||
"url":"https://firecrawl.dev"
|
||||
}
|
||||
|
||||
|
||||
### Crawl Website
|
||||
# @name crawl
|
||||
POST {{baseUrl}}/v1/crawl HTTP/1.1
|
||||
@ -120,3 +121,91 @@ content-type: application/json
|
||||
GET {{baseUrl}}/v1/llmstxt/{{generateLlmsTxtId}} HTTP/1.1
|
||||
Authorization: Bearer {{$dotenv TEST_API_KEY}}
|
||||
|
||||
### Scrape with JSON Schema Extraction
|
||||
# @name scrapeWithSchema
|
||||
POST {{baseUrl}}/v1/scrape HTTP/1.1
|
||||
Authorization: Bearer {{$dotenv TEST_API_KEY}}
|
||||
content-type: application/json
|
||||
|
||||
{
|
||||
"url": "https://firecrawl.dev",
|
||||
"formats": ["json"],
|
||||
"jsonOptions": {
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"description": {
|
||||
"type": "string",
|
||||
"description": "Describe the site"
|
||||
},
|
||||
"respect_robots_txt": {
|
||||
"type": ["boolean","null"],
|
||||
"description": "Does firecrawl respect the robots.txt files?"
|
||||
}
|
||||
},
|
||||
"required": ["description", "respect_robots_txt"]
|
||||
}
|
||||
// "systemPrompt": "You are an expert web scraper." // Optional system prompt
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
### Scrape with JSON Schema Extraction
|
||||
# @name scrapeWithSchema
|
||||
POST {{baseUrl}}/v1/scrape HTTP/1.1
|
||||
Authorization: Bearer {{$dotenv TEST_API_KEY}}
|
||||
content-type: application/json
|
||||
|
||||
{
|
||||
"url": "https://firecrawl.dev",
|
||||
"formats": ["json"],
|
||||
"jsonOptions": {
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"description": {
|
||||
"type": "string",
|
||||
"description": "Describe the site"
|
||||
}
|
||||
|
||||
},
|
||||
"required": ["description" ]
|
||||
}
|
||||
// "systemPrompt": "You are an expert web scraper." // Optional system prompt
|
||||
}
|
||||
}
|
||||
|
||||
### Scrape to Extract Array of Titles
|
||||
# @name scrapeItemsArray
|
||||
POST {{baseUrl}}/v1/scrape HTTP/1.1
|
||||
Authorization: Bearer {{$dotenv TEST_API_KEY}}
|
||||
content-type: application/json
|
||||
|
||||
{
|
||||
"url": "https://firecrawl-e2e-test-git-main-rafaelsideguides-projects.vercel.app/load-more/with-params",
|
||||
"formats": ["json"],
|
||||
"jsonOptions": {
|
||||
"prompt": "Extract all the main article or blog post titles from the page into an array.",
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"items": {
|
||||
"type": "array",
|
||||
"description": "An array containing the extracted items.",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"title": {
|
||||
"type": "string",
|
||||
"description": "The title of a single article or blog post."
|
||||
}
|
||||
},
|
||||
"required": ["title"]
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": ["items"]
|
||||
}
|
||||
// "systemPrompt": "You are an expert structured data extractor."
|
||||
}
|
||||
}
|
@ -230,15 +230,18 @@ export async function extractData({
|
||||
}),
|
||||
);
|
||||
}
|
||||
console.log("smartscrapeResults", smartscrapeResults);
|
||||
|
||||
const scrapedPages = smartscrapeResults.map(
|
||||
(result) => result.scrapedPages,
|
||||
);
|
||||
const htmls = scrapedPages.map((page) => page.html);
|
||||
console.log("scrapedPages", scrapedPages);
|
||||
const htmls = scrapedPages.flat().map((page) => page.html);
|
||||
console.log("htmls", htmls);
|
||||
const markdowns = await Promise.all(
|
||||
htmls.map(async (html) => await parseMarkdown(html)),
|
||||
);
|
||||
|
||||
console.log("markdowns", markdowns);
|
||||
extractedData = await Promise.all(
|
||||
markdowns.map(async (markdown) => {
|
||||
const newExtractOptions = {
|
||||
|
Loading…
x
Reference in New Issue
Block a user