From 018c6a616e9b8e845df86e1d0810176f46372b0f Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Thu, 1 May 2025 01:26:09 +0000
Subject: [PATCH] Add Qwen3 web crawler example using OpenRouter

Co-Authored-By: eric@sideguide.dev <eric@sideguide.dev>
---
 examples/qwen3-web-crawler/.env.example       |   8 +
 examples/qwen3-web-crawler/README.md          |  76 ++++++++++
 .../qwen3-web-crawler/qwen3_web_crawler.py    | 143 ++++++++++++++++++
 3 files changed, 227 insertions(+)
 create mode 100644 examples/qwen3-web-crawler/.env.example
 create mode 100644 examples/qwen3-web-crawler/README.md
 create mode 100644 examples/qwen3-web-crawler/qwen3_web_crawler.py

diff --git a/examples/qwen3-web-crawler/.env.example b/examples/qwen3-web-crawler/.env.example
new file mode 100644
index 00000000..9bac29b2
--- /dev/null
+++ b/examples/qwen3-web-crawler/.env.example
@@ -0,0 +1,8 @@
+# Firecrawl API key
+FIRECRAWL_API_KEY=your_firecrawl_api_key_here
+
+# OpenAI API key (if using OpenAI for OpenRouter)
+OPENAI_API_KEY=your_openai_api_key_here
+
+# OpenRouter API key (alternative to OpenAI API key)
+OPENROUTER_API_KEY=your_openrouter_api_key_here
diff --git a/examples/qwen3-web-crawler/README.md b/examples/qwen3-web-crawler/README.md
new file mode 100644
index 00000000..d918908f
--- /dev/null
+++ b/examples/qwen3-web-crawler/README.md
@@ -0,0 +1,76 @@
+# Qwen3 Web Crawler Example
+
+This example demonstrates how to use the Firecrawl API with the Qwen3 30B A3B model via OpenRouter to crawl websites and extract information based on specific objectives.
+
+## Features
+
+- Maps a website to find relevant pages based on an objective
+- Uses the Qwen3 30B A3B model from OpenRouter for intelligent search parameter generation
+- Scrapes and analyzes top pages to extract relevant information
+- Returns structured JSON data when the objective is met
+
+## Prerequisites
+
+- Python 3.7+
+- Firecrawl API key
+- OpenRouter API key (or OpenAI API key configured for OpenRouter)
+
+## Setup
+
+1. Clone the repository
+2. Install the required dependencies:
+   ```
+   pip install firecrawl openai python-dotenv
+   ```
+3. Create a `.env` file based on the `.env.example` template and add your API keys:
+   ```
+   FIRECRAWL_API_KEY=your_firecrawl_api_key_here
+   OPENAI_API_KEY=your_openai_api_key_here
+   OPENROUTER_API_KEY=your_openrouter_api_key_here
+   ```
+
+## Usage
+
+Run the script:
+
+```
+python qwen3_web_crawler.py
+```
+
+You will be prompted to:
+1. Enter the website URL to crawl
+2. Enter your objective (what information you're looking for)
+
+The script will:
+1. Analyze your objective to determine the optimal search parameter
+2. Map the website to find relevant pages
+3. Scrape and analyze the top pages
+4. Extract and return the relevant information in JSON format if the objective is met
+
+## Example
+
+Input:
+- Website: https://firecrawl.dev
+- Objective: Find the pricing information for the API service
+
+Output:
+```json
+{
+  "basic_plan": "$49/month",
+  "pro_plan": "$99/month",
+  "enterprise_plan": "Custom pricing",
+  "free_trial": "7 days"
+}
+```
+
+## How It Works
+
+1. The script uses the Firecrawl API to map and scrape websites
+2. It leverages the Qwen3 30B A3B model via OpenRouter to:
+   - Generate optimal search parameters based on the objective
+   - Analyze scraped content to determine if the objective is met
+   - Extract relevant information in a structured format
+
+## License
+
+This example is part of the Firecrawl project and is licensed under the same terms.
diff --git a/examples/qwen3-web-crawler/qwen3_web_crawler.py b/examples/qwen3-web-crawler/qwen3_web_crawler.py
new file mode 100644
index 00000000..847741ca
--- /dev/null
+++ b/examples/qwen3-web-crawler/qwen3_web_crawler.py
@@ -0,0 +1,143 @@
+import os
+from firecrawl import FirecrawlApp
+import json
+from dotenv import load_dotenv
+from openai import OpenAI
+
+class Colors:
+    CYAN = '\033[96m'
+    YELLOW = '\033[93m'
+    GREEN = '\033[92m'
+    RED = '\033[91m'
+    MAGENTA = '\033[95m'
+    BLUE = '\033[94m'
+    RESET = '\033[0m'
+
+load_dotenv()
+
+firecrawl_api_key = os.getenv("FIRECRAWL_API_KEY")
+openai_api_key = os.getenv("OPENAI_API_KEY")
+openrouter_api_key = os.getenv("OPENROUTER_API_KEY")
+
+app = FirecrawlApp(api_key=firecrawl_api_key)
+client = OpenAI(
+    api_key=openai_api_key or openrouter_api_key,
+    base_url="https://openrouter.ai/api/v1"
+)
+
+def find_relevant_page_via_map(objective, url, app, client):
+    try:
+        print(f"{Colors.CYAN}Understood. The objective is: {objective}{Colors.RESET}")
+        print(f"{Colors.CYAN}Initiating search on the website: {url}{Colors.RESET}")
+        
+        map_prompt = f"""
+        The map function generates a list of URLs from a website and it accepts a search parameter. Based on the objective of: {objective}, come up with a 1-2 word search parameter that will help us find the information we need. Only respond with 1-2 words nothing else.
+        """
+
+        print(f"{Colors.YELLOW}Analyzing objective to determine optimal search parameter...{Colors.RESET}")
+        completion = client.chat.completions.create(
+            model="qwen/qwen3-30b-a3b:free",
+            messages=[
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": map_prompt
+                        }
+                    ]
+                }
+            ]
+        )
+
+        map_search_parameter = completion.choices[0].message.content
+        print(f"{Colors.GREEN}Optimal search parameter identified: {map_search_parameter}{Colors.RESET}")
+
+        print(f"{Colors.YELLOW}Mapping website using the identified search parameter...{Colors.RESET}")
+        map_website = app.map_url(url, params={"search": map_search_parameter})
+        print(f"{Colors.GREEN}Website mapping completed successfully.{Colors.RESET}")
+        print(f"{Colors.GREEN}Located {len(map_website)} relevant links.{Colors.RESET}")
+        return map_website
+    except Exception as e:
+        print(f"{Colors.RED}Error encountered during relevant page identification: {str(e)}{Colors.RESET}")
+        return None
+    
+def find_objective_in_top_pages(map_website, objective, app, client):
+    try:
+        top_links = map_website[:3] if isinstance(map_website, list) else []
+        print(f"{Colors.CYAN}Proceeding to analyze top {len(top_links)} links: {top_links}{Colors.RESET}")
+        
+        for link in top_links:
+            print(f"{Colors.YELLOW}Initiating scrape of page: {link}{Colors.RESET}")
+            scrape_result = app.scrape_url(link, params={'formats': ['markdown']})
+            print(f"{Colors.GREEN}Page scraping completed successfully.{Colors.RESET}")
+     
+            
+            check_prompt = f"""
+            Given the following scraped content and objective, determine if the objective is met.
+            If it is, extract the relevant information in a simple and concise JSON format. Use only the necessary fields and avoid nested structures if possible.
+            If the objective is not met with confidence, respond with 'Objective not met'.
+
+            Objective: {objective}
+            Scraped content: {scrape_result['markdown']}
+
+            Remember:
+            1. Only return JSON if you are confident the objective is fully met.
+            2. Keep the JSON structure as simple and flat as possible.
+            3. Do not include any explanations or markdown formatting in your response.
+            """
+        
+            completion = client.chat.completions.create(
+            model="qwen/qwen3-30b-a3b:free",
+            messages=[
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": check_prompt
+                        }
+                    ]
+                }
+                ]
+            )
+            
+            result = completion.choices[0].message.content
+            
+            if result != "Objective not met":
+                print(f"{Colors.GREEN}Objective potentially fulfilled. Relevant information identified.{Colors.RESET}")
+                try:
+                    return json.loads(result)
+                except json.JSONDecodeError:
+                    print(f"{Colors.RED}Error in parsing response. Proceeding to next page...{Colors.RESET}")
+            else:
+                print(f"{Colors.YELLOW}Objective not met on this page. Proceeding to next link...{Colors.RESET}")
+        
+        print(f"{Colors.RED}All available pages analyzed. Objective not fulfilled in examined content.{Colors.RESET}")
+        return None
+    
+    except Exception as e:
+        print(f"{Colors.RED}Error encountered during page analysis: {str(e)}{Colors.RESET}")
+        return None
+
+def main():
+    url = input(f"{Colors.BLUE}Enter the website to crawl: {Colors.RESET}")
+    objective = input(f"{Colors.BLUE}Enter your objective: {Colors.RESET}")
+    
+    print(f"{Colors.YELLOW}Initiating web crawling process...{Colors.RESET}")
+    map_website = find_relevant_page_via_map(objective, url, app, client)
+    
+    if map_website:
+        print(f"{Colors.GREEN}Relevant pages identified. Proceeding with detailed analysis...{Colors.RESET}")
+        result = find_objective_in_top_pages(map_website, objective, app, client)
+        
+        if result:
+            print(f"{Colors.GREEN}Objective successfully fulfilled. Extracted information:{Colors.RESET}")
+            print(f"{Colors.MAGENTA}{json.dumps(result, indent=2)}{Colors.RESET}")
+        else:
+            print(f"{Colors.RED}Unable to fulfill the objective with the available content.{Colors.RESET}")
+    else:
+        print(f"{Colors.RED}No relevant pages identified. Consider refining the search parameters or trying a different website.{Colors.RESET}")
+
+if __name__ == "__main__":
+    main()