This commit is contained in:
Thomas Kosmas 2025-04-03 23:23:32 +03:00
parent 2ffde5abc1
commit fea102dac6
2 changed files with 94 additions and 2 deletions

View File

@ -12,6 +12,7 @@ content-type: application/json
"url":"https://firecrawl.dev"
}
### Crawl Website
# @name crawl
POST {{baseUrl}}/v1/crawl HTTP/1.1
@ -120,3 +121,91 @@ content-type: application/json
GET {{baseUrl}}/v1/llmstxt/{{generateLlmsTxtId}} HTTP/1.1
Authorization: Bearer {{$dotenv TEST_API_KEY}}
### Scrape with JSON Schema Extraction
# @name scrapeWithSchema
POST {{baseUrl}}/v1/scrape HTTP/1.1
Authorization: Bearer {{$dotenv TEST_API_KEY}}
content-type: application/json
{
"url": "https://firecrawl.dev",
"formats": ["json"],
"jsonOptions": {
"schema": {
"type": "object",
"properties": {
"description": {
"type": "string",
"description": "Describe the site"
},
"respect_robots_txt": {
"type": ["boolean","null"],
"description": "Does firecrawl respect the robots.txt files?"
}
},
"required": ["description", "respect_robots_txt"]
}
// "systemPrompt": "You are an expert web scraper." // Optional system prompt
}
}
### Scrape with JSON Schema Extraction
# @name scrapeWithSchema
POST {{baseUrl}}/v1/scrape HTTP/1.1
Authorization: Bearer {{$dotenv TEST_API_KEY}}
content-type: application/json
{
"url": "https://firecrawl.dev",
"formats": ["json"],
"jsonOptions": {
"schema": {
"type": "object",
"properties": {
"description": {
"type": "string",
"description": "Describe the site"
}
},
"required": ["description" ]
}
// "systemPrompt": "You are an expert web scraper." // Optional system prompt
}
}
### Scrape to Extract Array of Titles
# @name scrapeItemsArray
POST {{baseUrl}}/v1/scrape HTTP/1.1
Authorization: Bearer {{$dotenv TEST_API_KEY}}
content-type: application/json
{
"url": "https://firecrawl-e2e-test-git-main-rafaelsideguides-projects.vercel.app/load-more/with-params",
"formats": ["json"],
"jsonOptions": {
"prompt": "Extract all the main article or blog post titles from the page into an array.",
"schema": {
"type": "object",
"properties": {
"items": {
"type": "array",
"description": "An array containing the extracted items.",
"items": {
"type": "object",
"properties": {
"title": {
"type": "string",
"description": "The title of a single article or blog post."
}
},
"required": ["title"]
}
}
},
"required": ["items"]
}
// "systemPrompt": "You are an expert structured data extractor."
}
}

View File

@ -230,15 +230,18 @@ export async function extractData({
}),
);
}
console.log("smartscrapeResults", smartscrapeResults);
const scrapedPages = smartscrapeResults.map(
(result) => result.scrapedPages,
);
const htmls = scrapedPages.map((page) => page.html);
console.log("scrapedPages", scrapedPages);
const htmls = scrapedPages.flat().map((page) => page.html);
console.log("htmls", htmls);
const markdowns = await Promise.all(
htmls.map(async (html) => await parseMarkdown(html)),
);
console.log("markdowns", markdowns);
extractedData = await Promise.all(
markdowns.map(async (markdown) => {
const newExtractOptions = {