firecrawl/examples/groq_web_crawler/groq_website_analyzer.py

import os
from firecrawl import FirecrawlApp
from groq import Groq
from dotenv import load_dotenv

# ANSI color codes for pretty terminal output
class Colors:
    CYAN = '\033[96m'
    YELLOW = '\033[93m'
    GREEN = '\033[92m'
    RED = '\033[91m'
    MAGENTA = '\033[95m'
    BLUE = '\033[94m'
    RESET = '\033[0m'

# Load environment variables
load_dotenv()

# Retrieve API keys from environment variables
firecrawl_api_key = os.getenv("FIRECRAWL_API_KEY")
groq_api_key = os.getenv("GROQ_API_KEY")

# Initialize the FirecrawlApp and Groq client
app = FirecrawlApp(api_key=firecrawl_api_key)
groq_client = Groq(api_key=groq_api_key)

def scrape_website(url):
    """
    Scrape a website using Firecrawl.

    Args:
        url (str): The URL to scrape

    Returns:
        dict: The scraped data
    """
    try:
        print(f"{Colors.YELLOW}Scraping website: {url}{Colors.RESET}")
        scrape_result = app.scrape_url(url, params={'formats': ['markdown']})
        print(f"{Colors.GREEN}Website scraped successfully.{Colors.RESET}")
        return scrape_result
    except Exception as e:
        print(f"{Colors.RED}Error scraping website: {str(e)}{Colors.RESET}")
        return None

def summarize_content(content, model="deepseek-r1-distill-llama-70b"):
    """
    Summarize content using Groq's API.

    Args:
        content (str): The content to summarize
        model (str): The model to use for summarization

    Returns:
        str: The generated summary
    """
    try:
        print(f"{Colors.YELLOW}Generating summary using Groq's {model} model...{Colors.RESET}")

        prompt = f"""
        Please provide a concise summary of the following website content.
        The summary should:
        - Be around 3-5 paragraphs
        - Highlight the main purpose of the website
        - Include key features or offerings
        - Mention any unique selling points

        Content:
        {content}
        """

        completion = groq_client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": "You are a helpful assistant that specializes in creating concise website summaries."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.5,
            max_tokens=1000
        )

        summary = completion.choices[0].message.content
        print(f"{Colors.GREEN}Summary generated successfully.{Colors.RESET}")
        return summary
    except Exception as e:
        print(f"{Colors.RED}Error generating summary: {str(e)}{Colors.RESET}")
        return None

def analyze_website_sentiment(content, model="deepseek-r1-distill-llama-70b"):
    """
    Analyze the sentiment and tone of the website content using Groq's API.

    Args:
        content (str): The content to analyze
        model (str): The model to use for analysis

    Returns:
        dict: The sentiment analysis result
    """
    try:
        print(f"{Colors.YELLOW}Analyzing website sentiment using Groq's {model} model...{Colors.RESET}")

        prompt = f"""
        Please analyze the sentiment and tone of the following website content.
        Return your analysis as a JSON object with the following fields:
        - sentiment: the overall sentiment (positive, neutral, negative)
        - tone_descriptors: an array of 3-5 adjectives describing the tone
        - formality_level: an estimate of how formal the language is (1-10 scale)
        - target_audience: your estimate of who the content is aimed at

        Content:
        {content}
        """

        completion = groq_client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": "You are a helpful assistant that specializes in content and sentiment analysis."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.2,
            max_tokens=800
        )

        analysis_text = completion.choices[0].message.content
        print(f"{Colors.GREEN}Sentiment analysis completed.{Colors.RESET}")

        # Extract the JSON from the response
        try:
            import re
            import json
            json_match = re.search(r'({.*})', analysis_text, re.DOTALL)
            if json_match:
                json_str = json_match.group(1)
                analysis = json.loads(json_str)
                return analysis
            return {"error": "Could not parse JSON from response"}
        except Exception as json_err:
            print(f"{Colors.RED}Error parsing JSON response: {str(json_err)}{Colors.RESET}")
            return {"error": "Could not parse JSON", "raw_response": analysis_text}
    except Exception as e:
        print(f"{Colors.RED}Error analyzing sentiment: {str(e)}{Colors.RESET}")
        return None

def extract_key_topics(content, model="deepseek-r1-distill-llama-70b"):
    """
    Extract key topics and concepts from the website content using Groq's API.

    Args:
        content (str): The content to analyze
        model (str): The model to use for extraction

    Returns:
        list: The extracted key topics
    """
    try:
        print(f"{Colors.YELLOW}Extracting key topics using Groq's {model} model...{Colors.RESET}")

        prompt = f"""
        Extract the 5-8 most important topics or concepts from the following website content.
        For each topic, provide:
        1. A short name (1-3 words)
        2. A brief description (10-15 words)

        Return your response as a simple list in the following format:
        1. [Topic name]: [Brief description]
        2. [Topic name]: [Brief description]

        Content:
        {content}
        """

        completion = groq_client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": "You are a helpful assistant that specializes in extracting key topics from content."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.3,
            max_tokens=800
        )

        topics_text = completion.choices[0].message.content
        print(f"{Colors.GREEN}Key topics extracted successfully.{Colors.RESET}")
        return topics_text
    except Exception as e:
        print(f"{Colors.RED}Error extracting key topics: {str(e)}{Colors.RESET}")
        return None

def main():
    """
    Main function to run the website analysis.
    """
    # Get user input
    url = input(f"{Colors.BLUE}Enter the website URL to analyze: {Colors.RESET}")

    if not url.strip():
        print(f"{Colors.RED}No URL entered. Exiting.{Colors.RESET}")
        return

    # Add http:// prefix if not present
    if not url.startswith('http'):
        url = 'https://' + url

    # Scrape the website
    scrape_result = scrape_website(url)

    if not scrape_result or 'markdown' not in scrape_result:
        print(f"{Colors.RED}Failed to scrape website. Exiting.{Colors.RESET}")
        return

    content = scrape_result['markdown']

    # Ask user which analysis to perform
    print(f"\n{Colors.BLUE}Select an analysis option:{Colors.RESET}")
    print(f"1. Generate a concise summary of the website")
    print(f"2. Analyze the sentiment and tone of the website")
    print(f"3. Extract key topics from the website")
    print(f"4. Perform all analyses")

    option = input(f"{Colors.BLUE}Enter your choice (1-4): {Colors.RESET}")

    # Perform the selected analysis
    if option == '1' or option == '4':
        summary = summarize_content(content)
        if summary:
            print(f"\n{Colors.CYAN}Website Summary:{Colors.RESET}")
            print(f"{Colors.MAGENTA}{summary}{Colors.RESET}")
            print("\n")

    if option == '2' or option == '4':
        sentiment = analyze_website_sentiment(content)
        if sentiment:
            print(f"\n{Colors.CYAN}Sentiment Analysis:{Colors.RESET}")
            print(f"{Colors.MAGENTA}{sentiment}{Colors.RESET}")
            print("\n")

    if option == '3' or option == '4':
        topics = extract_key_topics(content)
        if topics:
            print(f"\n{Colors.CYAN}Key Topics:{Colors.RESET}")
            print(f"{Colors.MAGENTA}{topics}{Colors.RESET}")
            print("\n")

    print(f"{Colors.GREEN}Analysis complete!{Colors.RESET}")

if __name__ == "__main__":
    main()