From ac6650e48898719a9073983665f00c488c79a977 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Mon, 13 Jan 2025 22:31:54 -0300 Subject: [PATCH] Update requests.http --- apps/api/requests.http | 445 ++--------------------------------------- 1 file changed, 16 insertions(+), 429 deletions(-) diff --git a/apps/api/requests.http b/apps/api/requests.http index 6a1e9375..962ebabe 100644 --- a/apps/api/requests.http +++ b/apps/api/requests.http @@ -62,450 +62,37 @@ content-type: application/json "sitemapOnly": true } -### +Authorization: Bearer {{$dotenv TEST_API_KEY}} + + +### Extract Firecrawl Title +# @name extractFirecrawl POST {{baseUrl}}/v1/extract HTTP/1.1 Authorization: Bearer {{$dotenv TEST_API_KEY}} content-type: application/json { "urls": [ - "justcall.io/*" + "https://firecrawl.dev/blog" ], + "origin": "api-sdk", + "prompt": "Extract all the blog titles from the page, is multity entity = true", "schema": { "type": "object", "properties": { - "title": { "type": "string" }, - "description": { "type": "string" }, - "pricing": { - "type": "object", - "properties": { - "plan": { "type": "string" }, - "description": { "type": "string" }, - "price": { "type": "string" }, - "currency": { "type": "string" } - } - } - } - }, - "prompt": "Extract the title and description from the website", - "urlTrace": true -} - -### Extract -# @name extract -POST {{baseUrl}}/v1/extract HTTP/1.1 -Authorization: Bearer {{$dotenv TEST_API_KEY}} -content-type: application/json - -{ - "urls": [ - "https://benscreeknursery.com/*", - "https://www.bbb.org/us/nc/littleton/profile/landscape-contractors/bens-creek-nursery-0593-90077956", - "https://benscreeknursery.com/testimonial/", - "https://www.dnb.com/business-directory/company-profiles.bens_creek_nursery.af816431450a54ace44050c5dbc4cb30.html" - ], - "prompt": "Extract the following information from the website: business overview, management team, customer feedback, operational details, reputation, financial information, online presence, additional notes", - "schema": { - "type": "object", - "properties": { - "business_overview": { - "type": "object", - "properties": { - "name": { "type": "string" }, - "location": { - "type": "object", - "properties": { - "address": { "type": "string" }, - "source": { "type": "string" } - } - }, - "years_in_operation": { - "type": "object", - "properties": { - "value": { "type": "string" }, - "source": { "type": "string" } - } - }, - "services_offered": { - "type": "object", - "properties": { - "list": { - "type": "array", - "items": { "type": "string" } - }, - "source": { "type": "string" } - } - }, - "business_structure": { - "type": "object", - "properties": { - "type": { "type": "string" }, - "source": { "type": "string" } - } - }, - "licensing": { - "type": "object", - "properties": { - "agency": { "type": "string" }, - "license_number": { "type": "string" }, - "source": { "type": "string" } - } - } - } - }, - "management_team": { - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { "type": "string" }, - "role": { "type": "string" }, - "source": { "type": "string" } - } - } - }, - "customer_feedback": { - "type": "object", - "properties": { - "birdeye_reviews": { - "type": "object", - "properties": { - "rating": { "type": "number" }, - "total_reviews": { "type": "integer" }, - "source": { "type": "string" } - } - }, - "testimonials": { - "type": "array", - "items": { - "type": "object", - "properties": { - "author": { "type": "string" }, - "content": { "type": "string" }, - "source": { "type": "string" } - } - } - } - } - }, - "operational_details": { - "type": "object", - "properties": { - "office_hours": { - "type": "object", - "properties": { - "monday_to_friday": { "type": "string" }, - "saturday": { "type": "string" }, - "sunday": { "type": "string" }, - "source": { "type": "string" } - } - }, - "contact_information": { - "type": "object", - "properties": { - "phone": { "type": "string" }, - "email": { "type": "string" }, - "source": { "type": "string" } - } - } - } - }, - "reputation": { - "type": "object", - "properties": { - "bbb_rating": { - "type": "object", - "properties": { - "value": { "type": "string" }, - "source": { "type": "string" } - } - }, - "customer_complaints": { - "type": "object", - "properties": { - "value": { "type": "string" }, - "source": { "type": "string" } - } - } - } - }, - "financial_information": { - "type": "object", - "properties": { - "dun_and_bradstreet_profile": { - "type": "object", - "properties": { - "status": { "type": "string" }, - "source": { "type": "string" } - } - }, - "financial_statements": { - "type": "object", - "properties": { - "status": { "type": "string" }, - "source": { "type": "string" } - } - } - } - }, - "online_presence": { - "type": "object", - "properties": { - "website": { - "type": "object", - "properties": { - "url": { "type": "string" }, - "source": { "type": "string" } - } - }, - "social_media": { - "type": "object", - "properties": { - "platform": { "type": "string" }, - "details": { "type": "string" }, - "source": { "type": "string" } - } - } - } - }, - "additional_notes": { - "type": "array", - "items": { - "type": "object", - "properties": { - "note": { "type": "string" }, - "source": { "type": "string" } - } - } - } - } - } -} - -### -### Batch Scrape Websites -# @name batchScrape -POST {{baseUrl}}/v1/batch/scrape HTTP/1.1 -Authorization: Bearer {{$dotenv TEST_API_KEY}} -content-type: application/json - -{ - "urls":[ - "https://mommypoppins.com/new-york-city-kids/event/events/gobble-gobble-give-at-the-apollo", - "https://mommypoppins.com/new-york-city-kids/event/events/sold-out-an-irish-halloweenoiche-shamhna-celtic-magic-at-the-irish", - "https://mommypoppins.com/new-york-city-kids/event/events/gingerbread-decorating-at-queens-county-farm-museum", - "https://mommypoppins.com/new-york-city-kids/event/events/the-yorkville-nutcracker-at-john-jay-college", - "https://mommypoppins.com/new-york-city-kids/event/events/day-of-the-dead-2024-at-the-museum-of-the-moving-image", - "https://mommypoppins.com/new-york-city-kids/event/events/watson-adventures-ghosts-of-greenwich-village-scavenger-hunt-for-1", - "https://mommypoppins.com/new-york-city-kids/event/events/luna-luna-forgotten-fantasy-at-the-shed", - "https://mommypoppins.com/new-york-city-kids/event/events/first-friday-art-music-dancing-at-the-bronx-museum", - - ], - "extract":{ - "prompt":"Use the schema for extracting from the main event in the page.\nDates should be extracted in YYYY-MM-DD format.\nExtracted times have to be separated by date of week when explicit in the event description, otherwise load all 7 days of the week. Times should be in the HH:MM format. Day sub-property is 0-6 for Sunday-Saturday\nIf event is free set price description to \"FREE\" and minmax both to 0. Otherwise description is the full text price cohort\nSome data could be better explained in the description of the event, extract as necessary.\n", - "schema":{ - "type":"object", - "$defs":{ - "MinMax":{ - "type":"object", - "title":"MinMax", - "required":["min","max"], - "properties":{ - "max":{ - "type":"number", - "title":"Max" - }, - "min":{ - "type":"number", - "title":"Min" - } - } - }, - "AgeRangeModel":{ - "type":"object", - "title":"AgeRangeModel", - "required":["range","description"], - "properties":{ - "range":{ - "$ref":"#/$defs/MinMax" - }, - "description":{ - "type":"string", - "title":"Description" - } - } - }, - "LocationModel":{ - "type":"object", - "title":"LocationModel", - "required":["name","address","phone"], - "properties":{ - "name":{ - "type":"string", - "title":"Name" - }, - "phone":{ - "type":"string", - "title":"Phone" - }, - "address":{ - "type":"string", - "title":"Address" - } - } - }, - "TimeSlotModel":{ - "type":"object", - "title":"TimeSlotModel", - "required":["day","start","end"], - "properties":{ - "day":{ - "type":"integer", - "title":"Day" - }, - "end":{ - "type":"string", - "title":"End" - }, - "start":{ - "type":"string", - "title":"Start" - } - } - }, - "DateRangeModel":{ - "type":"object", - "title":"DateRangeModel", - "required":["start","end"], - "properties":{ - "end":{ - "type":"string", - "title":"End" - }, - "start":{ - "type":"string", - "title":"Start" - } - } - }, - "PriceInfoModel":{ - "type":"object", - "title":"PriceInfoModel", - "required":["range","bracket"], - "properties":{ - "range":{ - "$ref":"#/$defs/MinMax" - }, - "bracket":{ - "type":"string", - "title":"Bracket" - } - } - } - }, - "title":"MommyPoppingsExtractionSchema", - "required":["name","date","time","description","website_url","age_range","price","location"], - "properties":{ - "date":{ - "$ref":"#/$defs/DateRangeModel" - }, - "name":{ - "type":"string", - "title":"Name" - }, - "time":{ - "type":"array", - "items":{ - "$ref":"#/$defs/TimeSlotModel" - }, - "title":"Time" - }, - "price":{ - "$ref":"#/$defs/PriceInfoModel" - }, - "location":{ - "$ref":"#/$defs/LocationModel" - }, - "age_range":{ - "$ref":"#/$defs/AgeRangeModel" - }, - "description":{ - "type":"string", - "title":"Description" - }, - "website_url":{ - "type":"string", - "title":"Website Url" - } - } - }, - "systemPrompt":"Based on the information on the page, extract all the information from the schema in JSON format. Try to extract all the fields even those that might not be marked as required." - }, - "formats":["extract"] -} - -### Check Batch Scrape Status -@batchScrapeId = {{batchScrape.response.body.$.id}} -# @name batchScrapeStatus -GET {{baseUrl}}/v1/crawl/4cb890c8-3fbe-4e02-94a4-15aee60446e8?skip=943 HTTP/1.1 -Authorization: Bearer {{$dotenv TEST_API_KEY}} - -### -# @name extract -POST {{baseUrl}}/v1/extract HTTP/1.1 -Authorization: Bearer {{$dotenv TEST_API_KEY}} -Content-Type: application/json - -{ - "urls": [ - "https://www.naamanp.co.il/catalog/product/view/id/16453/s/11314179/category/167/", - "https://www.naamanp.co.il/11290046", - "https://www.naamanp.co.il/11314193" - ], - "schema": { - "type": "object", - "properties": { - "title": { - "type": "string", - "title": "Title" - }, - "description": { - "type": "string", - "title": "Description" - }, - "categories": { + "blog_titles": { "type": "array", "items": { "type": "string" } - }, - "products": { - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string", - "title": "Name" - }, - "description": { - "type": "string", - "title": "Description" - }, - "price": { - "type": "string", - "title": "Price" - } - } - } } - } - }, - "systemPrompt": "Extract all the products from the page. Consider it's a huge store with thousands of products and categories." + }, + "required": ["blog_titles"] + } } ### -@extractId = {{extract.response.body.$.id}} -# @name extractStatus -GET {{baseUrl}}/v1/extract/{{extractId}} HTTP/1.1 -Authorization: Bearer {{$dotenv TEST_API_KEY}} \ No newline at end of file +@extractFirecrawlId = {{extractFirecrawl.response.body.$.id}} +# @name extractFirecrawlStatus +GET {{baseUrl}}/v1/extract/{{extractFirecrawlId}} HTTP/1.1 +Authorization: Bearer {{$dotenv TEST_API_KEY}}