import os import json import time import requests from dotenv import load_dotenv from google import genai from datetime import datetime # ANSI color codes class Colors: CYAN = '\033[96m' YELLOW = '\033[93m' GREEN = '\033[92m' RED = '\033[91m' MAGENTA = '\033[95m' BLUE = '\033[94m' BOLD = '\033[1m' UNDERLINE = '\033[4m' RESET = '\033[0m' # Emojis for different sections class Emojis: GITHUB = "🐙" STATS = "📊" CALENDAR = "📅" SKILLS = "💻" STAR = "⭐" ROCKET = "🚀" CHART = "📈" BULB = "💡" WARNING = "⚠️" CHECK = "✅" FIRE = "🔥" BOOK = "📚" TOOLS = "🛠️" GRAPH = "📊" TARGET = "🎯" # Load environment variables load_dotenv() # Initialize clients client = genai.Client(api_key=os.getenv("GOOGLE_API_KEY")) firecrawl_api_key = os.getenv("FIRECRAWL_API_KEY") if not firecrawl_api_key: print(f"{Colors.RED}{Emojis.WARNING} Warning: FIRECRAWL_API_KEY not found in environment variables{Colors.RESET}") def print_header(text, emoji, color=Colors.BLUE): """Print a formatted section header with emoji.""" width = 70 print("\n" + "═" * width) print(f"{color}{Colors.BOLD}{emoji} {text.center(width-4)} {emoji}{Colors.RESET}") print("═" * width + "\n") def print_section(title, content, emoji): """Print a formatted section with title, content, and emoji.""" print(f"\n{Colors.CYAN}{Colors.BOLD}{emoji} {title}{Colors.RESET}") print(f"{content}") def poll_extraction_result(extraction_id, api_key, interval=2, max_attempts=15): """Poll Firecrawl API for extraction results with shorter intervals.""" url = f"https://api.firecrawl.dev/v1/extract/{extraction_id}" headers = {'Authorization': f'Bearer {api_key}'} print(f"{Colors.YELLOW}Processing profile data...{Colors.RESET}") for attempt in range(max_attempts): try: response = requests.get(url, headers=headers, timeout=10) data = response.json() if data.get('success') and data.get('data'): print(f"{Colors.GREEN}Data extracted successfully!{Colors.RESET}") return data['data'] elif data.get('success'): if attempt % 3 == 0: # Print progress less frequently print(".", end="", flush=True) time.sleep(interval) else: print(f"\n{Colors.RED}API Error: {data.get('error', 'Unknown error')}{Colors.RESET}") return None except requests.exceptions.Timeout: print(f"\n{Colors.RED}Request timed out. Retrying...{Colors.RESET}") continue except Exception as e: print(f"\n{Colors.RED}Error polling results: {e}{Colors.RESET}") return None print(f"\n{Colors.RED}Extraction timed out after {max_attempts} attempts.{Colors.RESET}") return None def extract_github_profile(username, api_key): """Extract GitHub profile data using Firecrawl with optimized settings.""" if not api_key: print(f"{Colors.RED}Error: Firecrawl API key is missing{Colors.RESET}") return None headers = { 'Content-Type': 'application/json', 'Authorization': f'Bearer {api_key}' } github_url = f"https://github.com/{username}" # Simplified prompt for faster extraction payload = { "urls": [github_url], "prompt": """Extract key GitHub profile data: - Basic profile information (company, location, bio) - Repository list and details - Contribution statistics - Recent activity - Social stats""", "enableWebSearch": False } try: print(f"{Colors.YELLOW}Starting extraction for: {username}{Colors.RESET}") response = requests.post( "https://api.firecrawl.dev/v1/extract", headers=headers, json=payload, timeout=15 ) if response.status_code != 200: print(f"{Colors.RED}API Error ({response.status_code}): {response.text}{Colors.RESET}") return None data = response.json() if not data.get('success'): print(f"{Colors.RED}API Error: {data.get('error', 'Unknown error')}{Colors.RESET}") return None extraction_id = data.get('id') if not extraction_id: print(f"{Colors.RED}No extraction ID received{Colors.RESET}") return None return poll_extraction_result(extraction_id, api_key) except requests.exceptions.Timeout: print(f"{Colors.RED}Initial request timed out{Colors.RESET}") return None except Exception as e: print(f"{Colors.RED}Extraction failed: {e}{Colors.RESET}") return None def analyze_with_gemini(profile_data, username): """Use Gemini to analyze GitHub profile data with focus on comprehensive insights.""" prompt = f""" Analyze this GitHub profile and provide detailed insights from the available data. Focus on concrete information and metrics. Structure your response in these sections: 1. Professional Background - Current company/organization (if available) - Role/position (if available) - Professional website or blog links - Location (if available) 2. Activity Analysis - Total repositories and forks - Most active repositories (top 3) - Contribution frequency - Recent activity trends - Streak information 3. Technical Portfolio - Primary programming languages - Most used technologies/frameworks - Top contributed repositories - Notable project themes 4. Community Engagement - Followers and following count - Public contributions - Pull requests and issues - Project collaborations Rules: - Include only verifiable information from the profile - List specific repository names and their purposes - Include contribution statistics where available - Focus on recent activity (last 6 months) - Skip sections only if completely unavailable Profile Data: {json.dumps(profile_data, indent=2)} """ try: response = client.models.generate_content( model="gemini-2.0-flash", contents=prompt ) # Clean up response analysis = response.text.strip() analysis_lines = [line for line in analysis.split('\n') if not any(word in line.lower() for word in ['undetermined', 'unknown', 'limited', 'not available', 'needs', 'requires', 'unclear'])] cleaned_analysis = '\n'.join(line for line in analysis_lines if line.strip()) return format_report(cleaned_analysis, username) except Exception as e: print(f"{Colors.RED}Analysis failed: {e}{Colors.RESET}") return None def format_report(raw_analysis, username): """Format the analysis into a clean, professional report.""" report = f""" {Colors.BOLD}GitHub Profile Analysis: {username}{Colors.RESET} {Colors.CYAN}{'─' * 40}{Colors.RESET}\n""" sections = raw_analysis.split('\n') current_section = None for line in sections: line = line.strip() if not line: continue if any(section in line for section in ["Professional Background", "Activity Analysis", "Technical Portfolio", "Community Engagement"]): report += f"\n{Colors.BOLD}{Colors.BLUE}{line}{Colors.RESET}\n" elif line.startswith('-'): report += f"• {line[1:].strip()}\n" elif line and not line.startswith(('#', '•')): report += f" {line}\n" return report def save_report(report, username): """Save the report to a file.""" filename = f"github_analysis_{username}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt" try: with open(filename, 'w', encoding='utf-8') as f: # Strip ANSI color codes when saving to file clean_report = report for color in vars(Colors).values(): if isinstance(color, str) and color.startswith('\033'): clean_report = clean_report.replace(color, '') f.write(clean_report) return filename except Exception as e: print(f"{Colors.RED}{Emojis.WARNING} Error saving report: {e}{Colors.RESET}") return None def main(): username = input(f"{Colors.GREEN}GitHub username: {Colors.RESET}").strip() if not username: print(f"{Colors.RED}Please provide a valid username.{Colors.RESET}") return print("Analyzing profile...") profile_data = extract_github_profile(username, firecrawl_api_key) if not profile_data: print(f"{Colors.RED}Profile analysis failed.{Colors.RESET}") return report = analyze_with_gemini(profile_data, username) if report: print(report) else: print(f"{Colors.RED}Could not generate insights.{Colors.RESET}") if __name__ == "__main__": main()