Add groq_web_crawler example and dependencies

2025-08-13 19:15:54 +08:00 · 2025-02-26 22:24:38 +08:00 · 2025-02-26 22:24:38 +08:00 · 75ac980fe4
commit 75ac980fe4
parent bf1a79588e
2 changed files with 251 additions and 0 deletions
--- a/examples/groq_web_crawler/groq_website_analyzer.py
+++ b/examples/groq_web_crawler/groq_website_analyzer.py
@ -0,0 +1,248 @@
+import os
+from firecrawl import FirecrawlApp
+from groq import Groq
+from dotenv import load_dotenv
+
+# ANSI color codes for pretty terminal output
+class Colors:
+    CYAN = '\033[96m'
+    YELLOW = '\033[93m'
+    GREEN = '\033[92m'
+    RED = '\033[91m'
+    MAGENTA = '\033[95m'
+    BLUE = '\033[94m'
+    RESET = '\033[0m'
+
+# Load environment variables
+load_dotenv()
+
+# Retrieve API keys from environment variables
+firecrawl_api_key = os.getenv("FIRECRAWL_API_KEY")
+groq_api_key = os.getenv("GROQ_API_KEY")
+
+# Initialize the FirecrawlApp and Groq client
+app = FirecrawlApp(api_key=firecrawl_api_key)
+groq_client = Groq(api_key=groq_api_key)
+
+def scrape_website(url):
+    """
+    Scrape a website using Firecrawl.
+
+    Args:
+        url (str): The URL to scrape
+
+    Returns:
+        dict: The scraped data
+    """
+    try:
+        print(f"{Colors.YELLOW}Scraping website: {url}{Colors.RESET}")
+        scrape_result = app.scrape_url(url, params={'formats': ['markdown']})
+        print(f"{Colors.GREEN}Website scraped successfully.{Colors.RESET}")
+        return scrape_result
+    except Exception as e:
+        print(f"{Colors.RED}Error scraping website: {str(e)}{Colors.RESET}")
+        return None
+
+def summarize_content(content, model="deepseek-r1-distill-llama-70b"):
+    """
+    Summarize content using Groq's API.
+
+    Args:
+        content (str): The content to summarize
+        model (str): The model to use for summarization
+
+    Returns:
+        str: The generated summary
+    """
+    try:
+        print(f"{Colors.YELLOW}Generating summary using Groq's {model} model...{Colors.RESET}")
+
+        prompt = f"""
+        Please provide a concise summary of the following website content.
+        The summary should:
+        - Be around 3-5 paragraphs
+        - Highlight the main purpose of the website
+        - Include key features or offerings
+        - Mention any unique selling points
+
+        Content:
+        {content}
+        """
+
+        completion = groq_client.chat.completions.create(
+            model=model,
+            messages=[
+                {"role": "system", "content": "You are a helpful assistant that specializes in creating concise website summaries."},
+                {"role": "user", "content": prompt}
+            ],
+            temperature=0.5,
+            max_tokens=1000
+        )
+
+        summary = completion.choices[0].message.content
+        print(f"{Colors.GREEN}Summary generated successfully.{Colors.RESET}")
+        return summary
+    except Exception as e:
+        print(f"{Colors.RED}Error generating summary: {str(e)}{Colors.RESET}")
+        return None
+
+def analyze_website_sentiment(content, model="deepseek-r1-distill-llama-70b"):
+    """
+    Analyze the sentiment and tone of the website content using Groq's API.
+
+    Args:
+        content (str): The content to analyze
+        model (str): The model to use for analysis
+
+    Returns:
+        dict: The sentiment analysis result
+    """
+    try:
+        print(f"{Colors.YELLOW}Analyzing website sentiment using Groq's {model} model...{Colors.RESET}")
+
+        prompt = f"""
+        Please analyze the sentiment and tone of the following website content.
+        Return your analysis as a JSON object with the following fields:
+        - sentiment: the overall sentiment (positive, neutral, negative)
+        - tone_descriptors: an array of 3-5 adjectives describing the tone
+        - formality_level: an estimate of how formal the language is (1-10 scale)
+        - target_audience: your estimate of who the content is aimed at
+
+        Content:
+        {content}
+        """
+
+        completion = groq_client.chat.completions.create(
+            model=model,
+            messages=[
+                {"role": "system", "content": "You are a helpful assistant that specializes in content and sentiment analysis."},
+                {"role": "user", "content": prompt}
+            ],
+            temperature=0.2,
+            max_tokens=800
+        )
+
+        analysis_text = completion.choices[0].message.content
+        print(f"{Colors.GREEN}Sentiment analysis completed.{Colors.RESET}")
+
+        # Extract the JSON from the response
+        try:
+            import re
+            import json
+            json_match = re.search(r'({.*})', analysis_text, re.DOTALL)
+            if json_match:
+                json_str = json_match.group(1)
+                analysis = json.loads(json_str)
+                return analysis
+            return {"error": "Could not parse JSON from response"}
+        except Exception as json_err:
+            print(f"{Colors.RED}Error parsing JSON response: {str(json_err)}{Colors.RESET}")
+            return {"error": "Could not parse JSON", "raw_response": analysis_text}
+    except Exception as e:
+        print(f"{Colors.RED}Error analyzing sentiment: {str(e)}{Colors.RESET}")
+        return None
+
+def extract_key_topics(content, model="deepseek-r1-distill-llama-70b"):
+    """
+    Extract key topics and concepts from the website content using Groq's API.
+
+    Args:
+        content (str): The content to analyze
+        model (str): The model to use for extraction
+
+    Returns:
+        list: The extracted key topics
+    """
+    try:
+        print(f"{Colors.YELLOW}Extracting key topics using Groq's {model} model...{Colors.RESET}")
+
+        prompt = f"""
+        Extract the 5-8 most important topics or concepts from the following website content.
+        For each topic, provide:
+        1. A short name (1-3 words)
+        2. A brief description (10-15 words)
+
+        Return your response as a simple list in the following format:
+        1. [Topic name]: [Brief description]
+        2. [Topic name]: [Brief description]
+
+        Content:
+        {content}
+        """
+
+        completion = groq_client.chat.completions.create(
+            model=model,
+            messages=[
+                {"role": "system", "content": "You are a helpful assistant that specializes in extracting key topics from content."},
+                {"role": "user", "content": prompt}
+            ],
+            temperature=0.3,
+            max_tokens=800
+        )
+
+        topics_text = completion.choices[0].message.content
+        print(f"{Colors.GREEN}Key topics extracted successfully.{Colors.RESET}")
+        return topics_text
+    except Exception as e:
+        print(f"{Colors.RED}Error extracting key topics: {str(e)}{Colors.RESET}")
+        return None
+
+def main():
+    """
+    Main function to run the website analysis.
+    """
+    # Get user input
+    url = input(f"{Colors.BLUE}Enter the website URL to analyze: {Colors.RESET}")
+
+    if not url.strip():
+        print(f"{Colors.RED}No URL entered. Exiting.{Colors.RESET}")
+        return
+
+    # Add http:// prefix if not present
+    if not url.startswith('http'):
+        url = 'https://' + url
+
+    # Scrape the website
+    scrape_result = scrape_website(url)
+
+    if not scrape_result or 'markdown' not in scrape_result:
+        print(f"{Colors.RED}Failed to scrape website. Exiting.{Colors.RESET}")
+        return
+
+    content = scrape_result['markdown']
+
+    # Ask user which analysis to perform
+    print(f"\n{Colors.BLUE}Select an analysis option:{Colors.RESET}")
+    print(f"1. Generate a concise summary of the website")
+    print(f"2. Analyze the sentiment and tone of the website")
+    print(f"3. Extract key topics from the website")
+    print(f"4. Perform all analyses")
+
+    option = input(f"{Colors.BLUE}Enter your choice (1-4): {Colors.RESET}")
+
+    # Perform the selected analysis
+    if option == '1' or option == '4':
+        summary = summarize_content(content)
+        if summary:
+            print(f"\n{Colors.CYAN}Website Summary:{Colors.RESET}")
+            print(f"{Colors.MAGENTA}{summary}{Colors.RESET}")
+            print("\n")
+
+    if option == '2' or option == '4':
+        sentiment = analyze_website_sentiment(content)
+        if sentiment:
+            print(f"\n{Colors.CYAN}Sentiment Analysis:{Colors.RESET}")
+            print(f"{Colors.MAGENTA}{sentiment}{Colors.RESET}")
+            print("\n")
+
+    if option == '3' or option == '4':
+        topics = extract_key_topics(content)
+        if topics:
+            print(f"\n{Colors.CYAN}Key Topics:{Colors.RESET}")
+            print(f"{Colors.MAGENTA}{topics}{Colors.RESET}")
+            print("\n")
+
+    print(f"{Colors.GREEN}Analysis complete!{Colors.RESET}")
+
+if __name__ == "__main__":
+    main()
--- a/examples/groq_web_crawler/requirements.txt
+++ b/examples/groq_web_crawler/requirements.txt
@ -0,0 +1,3 @@
+firecrawl-py
+groq
+python-dotenv