From 732e6af8b9ed9734f68e5d58ac8525ecd0c75acc Mon Sep 17 00:00:00 2001 From: Eric Ciarla Date: Fri, 23 Aug 2024 15:49:41 -0400 Subject: [PATCH 01/14] Add internal link opportunities example --- .../find_internal_link_opportunites.ipynb | 509 ++++++++++++++++++ 1 file changed, 509 insertions(+) create mode 100644 examples/find_internal_link_opportunites/find_internal_link_opportunites.ipynb diff --git a/examples/find_internal_link_opportunites/find_internal_link_opportunites.ipynb b/examples/find_internal_link_opportunites/find_internal_link_opportunites.ipynb new file mode 100644 index 00000000..d6168878 --- /dev/null +++ b/examples/find_internal_link_opportunites/find_internal_link_opportunites.ipynb @@ -0,0 +1,509 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import datetime\n", + "import time\n", + "from firecrawl import FirecrawlApp\n", + "import json\n", + "import anthropic\n", + "from dotenv import load_dotenv\n", + "\n", + "# Load environment variables\n", + "load_dotenv()\n", + "\n", + "# Retrieve API keys from environment variables\n", + "anthropic_api_key = os.getenv(\"ANTHROPIC_API_KEY\") or \"\"\n", + "firecrawl_api_key = os.getenv(\"FIRECRAWL_API_KEY\") or \"\"\n", + "# Set variables\n", + "blog_url=\"https://mendable.ai/blog\"\n", + "\n", + "# Set up anthropic client\n", + "client = anthropic.Anthropic(\n", + " api_key=anthropic_api_key,\n", + ")\n", + "\n", + "# Initialize the FirecrawlApp with your API key\n", + "app = FirecrawlApp(api_key=firecrawl_api_key)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Crawl a website\n", + "params = {\n", + " 'crawlOptions': {\n", + " 'limit': 100\n", + " },\n", + " \"pageOptions\": {\n", + " \"onlyMainContent\": True\n", + " }\n", + "}\n", + "crawl_result = app.crawl_url(blog_url, params=params)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting potential links from crawl_result:\n", + "Collected 36 potential links:\n", + "URL: https://mendable.ai/blog/coachgtm-mongodb, Title: Meet MongoDBs CoachGTM.ai\n", + "URL: https://mendable.ai/blog/building-safe-rag, Title: Building Safe RAG systems with the LLM OWASP top 10\n", + "URL: https://mendable.ai/blog/gdpr-repository-pattern, Title: Navigating the Maze of GDPR Compliance: A Codebase Transformation\n", + "URL: https://mendable.ai/blog/how-mendable-leverages-langsmith-to-debug-tools-and-actions, Title: How Mendable leverages Langsmith to debug Tools & Actions\n", + "URL: https://mendable.ai/blog/european-data-storage, Title: Launching European Data Storage powered by MongoDB\n", + "URL: https://mendable.ai/blog/tools, Title: Introducing Tools and Actions\n", + "URL: https://mendable.ai/blog/december_update, Title: Mendable.ai December Recap\n", + "URL: https://mendable.ai/blog/november_update, Title: Mendable.ai November Update\n", + "URL: https://mendable.ai/blog/october-recap, Title: Mendable.ai October Recap\n", + "URL: https://mendable.ai/blog/midseptemberupdate, Title: Mendable.ai Mid September 2023 Update\n", + "URL: https://mendable.ai/blog/getting-started, Title: Everything you need to know about Mendable: Build and deploy AI Chat Search\n", + "URL: https://mendable.ai/blog/building-copilots, Title: Building context-aware AI copilots with Mendable\n", + "URL: https://mendable.ai/blog/august2023update, Title: Mendable.ai August 2023 Updates\n", + "URL: https://mendable.ai/blog/finetuning-gpt35, Title: Early Insights Fine-Tuning GPT 3.5 from Mendable.ai\n", + "URL: https://mendable.ai/blog/gpt35prompting, Title: Improving GPT-3.5, Insights from Mendable.ai\n", + "URL: https://mendable.ai/blog/precisemode, Title: Introducing Precise Mode for Mendable.ai\n", + "URL: https://mendable.ai/blog/customprompt, Title: Customizing Your LLM Model on Mendable.ai\n", + "URL: https://mendable.ai/blog/mendable-launch, Title: Introducing Mendable.ai\n", + "URL: https://mendable.ai/blog/european-data-storage, Title: Launching European Data Storage powered by MongoDB\n", + "URL: https://mendable.ai/blog/customprompt, Title: Customizing Your LLM Model on Mendable.ai\n", + "URL: https://mendable.ai/blog/precisemode, Title: Introducing Precise Mode for Mendable.ai\n", + "URL: https://mendable.ai/blog/building-copilots, Title: Building context-aware AI copilots with Mendable\n", + "URL: https://mendable.ai/blog/coachgtm-mongodb, Title: Meet MongoDBs CoachGTM.ai\n", + "URL: https://mendable.ai/blog/building-safe-rag, Title: Building Safe RAG systems with the LLM OWASP top 10\n", + "URL: https://mendable.ai/blog/gdpr-repository-pattern, Title: Navigating the Maze of GDPR Compliance: A Codebase Transformation\n", + "URL: https://mendable.ai/blog/how-mendable-leverages-langsmith-to-debug-tools-and-actions, Title: How Mendable leverages Langsmith to debug Tools & Actions\n", + "URL: https://mendable.ai/blog/tools, Title: Introducing Tools and Actions\n", + "URL: https://mendable.ai/blog/december_update, Title: Mendable.ai December Recap\n", + "URL: https://mendable.ai/blog/november_update, Title: Mendable.ai November Update\n", + "URL: https://mendable.ai/blog/october-recap, Title: Mendable.ai October Recap\n", + "URL: https://mendable.ai/blog/midseptemberupdate, Title: Mendable.ai Mid September 2023 Update\n", + "URL: https://mendable.ai/blog/getting-started, Title: Everything you need to know about Mendable: Build and deploy AI Chat Search\n", + "URL: https://mendable.ai/blog/august2023update, Title: Mendable.ai August 2023 Updates\n", + "URL: https://mendable.ai/blog/finetuning-gpt35, Title: Early Insights Fine-Tuning GPT 3.5 from Mendable.ai\n", + "URL: https://mendable.ai/blog/gpt35prompting, Title: Improving GPT-3.5, Insights from Mendable.ai\n", + "URL: https://mendable.ai/blog/mendable-launch, Title: Introducing Mendable.ai\n" + ] + } + ], + "source": [ + "potential_links = []\n", + "\n", + "if crawl_result:\n", + " print(\"Collecting potential links from crawl_result:\")\n", + " \n", + " for item in crawl_result:\n", + " metadata = item[\"metadata\"]\n", + " og_url = metadata.get(\"ogUrl\")\n", + " title = metadata.get(\"title\")\n", + " if og_url and title and og_url != blog_url:\n", + " potential_links.append({\"url\": og_url, \"title\": title})\n", + " \n", + " print(f\"Collected {len(potential_links)} potential links:\")\n", + " for link in potential_links:\n", + " print(f\"URL: {link['url']}, Title: {link['title']}\")\n", + " \n", + "else:\n", + " print(\"crawl_result is empty or None\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Suggestion for: Meet MongoDBs CoachGTM.ai\n", + "Blog phrase: Mendable also provides a Tools\n", + "Internal Link: https://mendable.ai/blog/tools\n", + "---\n", + "\n", + "\n", + "Suggestion for: Meet MongoDBs CoachGTM.ai\n", + "Blog phrase: MongoDB Atlas Vector Search to\n", + "Internal Link: https://mendable.ai/blog/european-data-storage\n", + "---\n", + "\n", + "\n", + "Suggestion for: Meet MongoDBs CoachGTM.ai\n", + "Blog phrase: By harnessing the power of\n", + "Internal Link: https://mendable.ai/blog/building-copilots\n", + "---\n", + "\n", + "\n", + "Suggestion for: Building Safe RAG systems with the LLM OWASP top 10\n", + "Blog phrase: Advantages of RAG\n", + "Internal Link: https://mendable.ai/blog/building-copilots\n", + "---\n", + "\n", + "\n", + "Suggestion for: Building Safe RAG systems with the LLM OWASP top 10\n", + "Blog phrase: Bring Your Model\n", + "Internal Link: https://mendable.ai/blog/customprompt\n", + "---\n", + "\n", + "\n", + "Suggestion for: Building Safe RAG systems with the LLM OWASP top 10\n", + "Blog phrase: Garbage in, Garbage out\n", + "Internal Link: https://mendable.ai/blog/precisemode\n", + "---\n", + "\n", + "\n", + "Suggestion for: Navigating the Maze of GDPR Compliance: A Codebase Transformation\n", + "Blog phrase: European data storage\n", + "Internal Link: https://mendable.ai/blog/european-data-storage\n", + "---\n", + "\n", + "\n", + "Suggestion for: Navigating the Maze of GDPR Compliance: A Codebase Transformation\n", + "Blog phrase: delivering value\n", + "Internal Link: https://mendable.ai/blog/getting-started\n", + "---\n", + "\n", + "\n", + "Suggestion for: How Mendable leverages Langsmith to debug Tools & Actions\n", + "Blog phrase: introduction of Tools & Actions\n", + "Internal Link: https://mendable.ai/blog/tools\n", + "---\n", + "\n", + "\n", + "Suggestion for: How Mendable leverages Langsmith to debug Tools & Actions\n", + "Blog phrase: Mendable Tools & Actions\n", + "Internal Link: https://mendable.ai/blog/tools\n", + "---\n", + "\n", + "\n", + "Suggestion for: Launching European Data Storage powered by MongoDB\n", + "Blog phrase: Clean Architecture and Repository pattern\n", + "Internal Link: https://mendable.ai/blog/gdpr-repository-pattern\n", + "---\n", + "\n", + "\n", + "Suggestion for: Launching European Data Storage powered by MongoDB\n", + "Blog phrase: building the best AI Chat\n", + "Internal Link: https://mendable.ai/blog/building-copilots\n", + "---\n", + "\n", + "\n", + "Suggestion for: Launching European Data Storage powered by MongoDB\n", + "Blog phrase: European RAG pipeline, powered by\n", + "Internal Link: https://mendable.ai/blog/building-safe-rag\n", + "---\n", + "\n", + "\n", + "Suggestion for: Introducing Tools and Actions\n", + "Blog phrase: augmentation and actions for automation\n", + "Internal Link: https://mendable.ai/blog/building-copilots\n", + "---\n", + "\n", + "\n", + "Suggestion for: Introducing Tools and Actions\n", + "Blog phrase: Mendable provides an API request\n", + "Internal Link: https://mendable.ai/blog/getting-started\n", + "---\n", + "\n", + "\n", + "Suggestion for: Introducing Tools and Actions\n", + "Blog phrase: AI use it when it\n", + "Internal Link: https://mendable.ai/blog/how-mendable-leverages-langsmith-to-debug-tools-and-actions\n", + "---\n", + "\n", + "\n", + "Suggestion for: Mendable.ai December Recap\n", + "Blog phrase: customizing the model\n", + "Internal Link: https://mendable.ai/blog/customprompt\n", + "---\n", + "\n", + "\n", + "Suggestion for: Mendable.ai December Recap\n", + "Blog phrase: AI sales copilot\n", + "Internal Link: https://mendable.ai/blog/building-copilots\n", + "---\n", + "\n", + "\n", + "Suggestion for: Mendable.ai December Recap\n", + "Blog phrase: Introducing Tools and Actions\n", + "Internal Link: https://mendable.ai/blog/tools\n", + "---\n", + "\n", + "\n", + "Suggestion for: Mendable.ai November Update\n", + "Blog phrase: Auto syncing data sources\n", + "Internal Link: https://mendable.ai/blog/european-data-storage\n", + "---\n", + "\n", + "\n", + "Suggestion for: Mendable.ai November Update\n", + "Blog phrase: Chat insights feature\n", + "Internal Link: https://mendable.ai/blog/tools\n", + "---\n", + "\n", + "\n", + "Suggestion for: Mendable.ai November Update\n", + "Blog phrase: Github private repo support\n", + "Internal Link: https://mendable.ai/blog/getting-started\n", + "---\n", + "\n", + "\n", + "Suggestion for: Mendable.ai October Recap\n", + "Blog phrase: Full Prompt Customization\n", + "Internal Link: https://mendable.ai/blog/customprompt\n", + "---\n", + "\n", + "\n", + "Suggestion for: Mendable.ai October Recap\n", + "Blog phrase: Expanded Model Support\n", + "Internal Link: https://mendable.ai/blog/gpt35prompting\n", + "---\n", + "\n", + "\n", + "Suggestion for: Mendable.ai October Recap\n", + "Blog phrase: AI-Powered Documentation Management\n", + "Internal Link: https://mendable.ai/blog/building-copilots\n", + "---\n", + "\n", + "\n", + "Suggestion for: Mendable.ai Mid September 2023 Update\n", + "Blog phrase: new integration templates\n", + "Internal Link: https://mendable.ai/blog/tools\n", + "---\n", + "\n", + "\n", + "Suggestion for: Mendable.ai Mid September 2023 Update\n", + "Blog phrase: Product Copilot feature\n", + "Internal Link: https://mendable.ai/blog/building-copilots\n", + "---\n", + "\n", + "\n", + "Suggestion for: Mendable.ai Mid September 2023 Update\n", + "Blog phrase: Data Exporting\n", + "Internal Link: https://mendable.ai/blog/getting-started\n", + "---\n", + "\n", + "\n", + "Suggestion for: Everything you need to know about Mendable: Build and deploy AI Chat Search\n", + "Blog phrase: robust API\n", + "Internal Link: https://mendable.ai/blog/tools\n", + "---\n", + "\n", + "\n", + "Suggestion for: Everything you need to know about Mendable: Build and deploy AI Chat Search\n", + "Blog phrase: pre-built components\n", + "Internal Link: https://mendable.ai/blog/building-copilots\n", + "---\n", + "\n", + "\n", + "Suggestion for: Everything you need to know about Mendable: Build and deploy AI Chat Search\n", + "Blog phrase: Customizing Your LLM Model\n", + "Internal Link: https://mendable.ai/blog/customprompt\n", + "---\n", + "\n", + "\n", + "Suggestion for: Building context-aware AI copilots with Mendable\n", + "Blog phrase: registered on our platform\n", + "Internal Link: https://mendable.ai/blog/getting-started\n", + "---\n", + "\n", + "\n", + "Suggestion for: Building context-aware AI copilots with Mendable\n", + "Blog phrase: dynamic context to the AI\n", + "Internal Link: https://mendable.ai/blog/customprompt\n", + "---\n", + "\n", + "\n", + "Suggestion for: Building context-aware AI copilots with Mendable\n", + "Blog phrase: personalized answers to your users\n", + "Internal Link: https://mendable.ai/blog/precisemode\n", + "---\n", + "\n", + "\n", + "Suggestion for: Mendable.ai August 2023 Updates\n", + "Blog phrase: Learn more about how to\n", + "Internal Link: https://mendable.ai/blog/precisemode\n", + "---\n", + "\n", + "\n", + "Suggestion for: Mendable.ai August 2023 Updates\n", + "Blog phrase: Building context-aware AI copilots with\n", + "Internal Link: https://mendable.ai/blog/building-copilots\n", + "---\n", + "\n", + "\n", + "Suggestion for: Mendable.ai August 2023 Updates\n", + "Blog phrase: customizable AI chat components\n", + "Internal Link: https://mendable.ai/blog/getting-started\n", + "---\n", + "\n", + "\n" + ] + }, + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[15], line 57\u001b[0m\n\u001b[1;32m 27\u001b[0m prompt_instructions \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\"\"\u001b[39m\u001b[38;5;124mGiven this blog post from \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcurrent_blog_url\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m called \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcurrent_blog_title\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, analyze the following blog content. Identify 0 to 3 of phrases (5 words max) from the inside of the middle of the article that could be linked to other blog posts from the list of potential links provided inside of . Return a JSON object structured as follows:\u001b[39m\n\u001b[1;32m 28\u001b[0m \n\u001b[1;32m 29\u001b[0m \u001b[38;5;124m\u001b[39m\u001b[38;5;130;01m{{\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 53\u001b[0m \n\u001b[1;32m 54\u001b[0m \u001b[38;5;124mGO AND ONLY RETURN THE JSON NOTHING ELSE:\u001b[39m\u001b[38;5;124m\"\"\"\u001b[39m\n\u001b[1;32m 56\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 57\u001b[0m message \u001b[38;5;241m=\u001b[39m \u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmessages\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcreate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 58\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mclaude-3-5-sonnet-20240620\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 59\u001b[0m \u001b[43m \u001b[49m\u001b[43mmax_tokens\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1024\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 60\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\n\u001b[1;32m 61\u001b[0m \u001b[43m \u001b[49m\u001b[43m{\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrole\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43muser\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcontent\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mprompt_instructions\u001b[49m\u001b[43m}\u001b[49m\n\u001b[1;32m 62\u001b[0m \u001b[43m \u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 63\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 65\u001b[0m \u001b[38;5;66;03m# Extract the JSON string from the TextBlock\u001b[39;00m\n\u001b[1;32m 66\u001b[0m json_string \u001b[38;5;241m=\u001b[39m message\u001b[38;5;241m.\u001b[39mcontent[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39mtext\n", + "File \u001b[0;32m~/projects/python_projects/agents_testing/.conda/lib/python3.10/site-packages/anthropic/_utils/_utils.py:277\u001b[0m, in \u001b[0;36mrequired_args..inner..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 275\u001b[0m msg \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMissing required argument: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mquote(missing[\u001b[38;5;241m0\u001b[39m])\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 276\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(msg)\n\u001b[0;32m--> 277\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/projects/python_projects/agents_testing/.conda/lib/python3.10/site-packages/anthropic/resources/messages.py:904\u001b[0m, in \u001b[0;36mMessages.create\u001b[0;34m(self, max_tokens, messages, model, metadata, stop_sequences, stream, system, temperature, tool_choice, tools, top_k, top_p, extra_headers, extra_query, extra_body, timeout)\u001b[0m\n\u001b[1;32m 870\u001b[0m \u001b[38;5;129m@required_args\u001b[39m([\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmax_tokens\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmessages\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel\u001b[39m\u001b[38;5;124m\"\u001b[39m], [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmax_tokens\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmessages\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstream\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n\u001b[1;32m 871\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcreate\u001b[39m(\n\u001b[1;32m 872\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 902\u001b[0m timeout: \u001b[38;5;28mfloat\u001b[39m \u001b[38;5;241m|\u001b[39m httpx\u001b[38;5;241m.\u001b[39mTimeout \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m|\u001b[39m NotGiven \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m600\u001b[39m,\n\u001b[1;32m 903\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Message \u001b[38;5;241m|\u001b[39m Stream[RawMessageStreamEvent]:\n\u001b[0;32m--> 904\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_post\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 905\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/v1/messages\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 906\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmaybe_transform\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 907\u001b[0m \u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 908\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmax_tokens\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmax_tokens\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 909\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmessages\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 910\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmodel\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 911\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmetadata\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmetadata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 912\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstop_sequences\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mstop_sequences\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 913\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstream\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 914\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msystem\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43msystem\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 915\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtemperature\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtemperature\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 916\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtool_choice\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtool_choice\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 917\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtools\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtools\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 918\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtop_k\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtop_k\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 919\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtop_p\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtop_p\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 920\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 921\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessage_create_params\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mMessageCreateParams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 922\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 923\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmake_request_options\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 924\u001b[0m \u001b[43m \u001b[49m\u001b[43mextra_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_headers\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_query\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_query\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_body\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_body\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\n\u001b[1;32m 925\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 926\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mMessage\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 927\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 928\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mStream\u001b[49m\u001b[43m[\u001b[49m\u001b[43mRawMessageStreamEvent\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 929\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/projects/python_projects/agents_testing/.conda/lib/python3.10/site-packages/anthropic/_base_client.py:1249\u001b[0m, in \u001b[0;36mSyncAPIClient.post\u001b[0;34m(self, path, cast_to, body, options, files, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1235\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mpost\u001b[39m(\n\u001b[1;32m 1236\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 1237\u001b[0m path: \u001b[38;5;28mstr\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1244\u001b[0m stream_cls: \u001b[38;5;28mtype\u001b[39m[_StreamT] \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 1245\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ResponseT \u001b[38;5;241m|\u001b[39m _StreamT:\n\u001b[1;32m 1246\u001b[0m opts \u001b[38;5;241m=\u001b[39m FinalRequestOptions\u001b[38;5;241m.\u001b[39mconstruct(\n\u001b[1;32m 1247\u001b[0m method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpost\u001b[39m\u001b[38;5;124m\"\u001b[39m, url\u001b[38;5;241m=\u001b[39mpath, json_data\u001b[38;5;241m=\u001b[39mbody, files\u001b[38;5;241m=\u001b[39mto_httpx_files(files), \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39moptions\n\u001b[1;32m 1248\u001b[0m )\n\u001b[0;32m-> 1249\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(ResponseT, \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mopts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m)\u001b[49m)\n", + "File \u001b[0;32m~/projects/python_projects/agents_testing/.conda/lib/python3.10/site-packages/anthropic/_base_client.py:931\u001b[0m, in \u001b[0;36mSyncAPIClient.request\u001b[0;34m(self, cast_to, options, remaining_retries, stream, stream_cls)\u001b[0m\n\u001b[1;32m 922\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mrequest\u001b[39m(\n\u001b[1;32m 923\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 924\u001b[0m cast_to: Type[ResponseT],\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 929\u001b[0m stream_cls: \u001b[38;5;28mtype\u001b[39m[_StreamT] \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 930\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ResponseT \u001b[38;5;241m|\u001b[39m _StreamT:\n\u001b[0;32m--> 931\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 932\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 933\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 934\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 935\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 936\u001b[0m \u001b[43m \u001b[49m\u001b[43mremaining_retries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mremaining_retries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 937\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/projects/python_projects/agents_testing/.conda/lib/python3.10/site-packages/anthropic/_base_client.py:962\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, remaining_retries, stream, stream_cls)\u001b[0m\n\u001b[1;32m 959\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSending HTTP Request: \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m, request\u001b[38;5;241m.\u001b[39mmethod, request\u001b[38;5;241m.\u001b[39murl)\n\u001b[1;32m 961\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 962\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_client\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 963\u001b[0m \u001b[43m \u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 964\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_should_stream_response_body\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 965\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 966\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 967\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m httpx\u001b[38;5;241m.\u001b[39mTimeoutException \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[1;32m 968\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mEncountered httpx.TimeoutException\u001b[39m\u001b[38;5;124m\"\u001b[39m, exc_info\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n", + "File \u001b[0;32m~/projects/python_projects/agents_testing/.conda/lib/python3.10/site-packages/httpx/_client.py:901\u001b[0m, in \u001b[0;36mClient.send\u001b[0;34m(self, request, stream, auth, follow_redirects)\u001b[0m\n\u001b[1;32m 893\u001b[0m follow_redirects \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 894\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfollow_redirects\n\u001b[1;32m 895\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(follow_redirects, UseClientDefault)\n\u001b[1;32m 896\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m follow_redirects\n\u001b[1;32m 897\u001b[0m )\n\u001b[1;32m 899\u001b[0m auth \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_build_request_auth(request, auth)\n\u001b[0;32m--> 901\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_send_handling_auth\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 902\u001b[0m \u001b[43m \u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 903\u001b[0m \u001b[43m \u001b[49m\u001b[43mauth\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mauth\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 904\u001b[0m \u001b[43m \u001b[49m\u001b[43mfollow_redirects\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfollow_redirects\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 905\u001b[0m \u001b[43m \u001b[49m\u001b[43mhistory\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 906\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 907\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 908\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m stream:\n", + "File \u001b[0;32m~/projects/python_projects/agents_testing/.conda/lib/python3.10/site-packages/httpx/_client.py:929\u001b[0m, in \u001b[0;36mClient._send_handling_auth\u001b[0;34m(self, request, auth, follow_redirects, history)\u001b[0m\n\u001b[1;32m 926\u001b[0m request \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mnext\u001b[39m(auth_flow)\n\u001b[1;32m 928\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m--> 929\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_send_handling_redirects\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 930\u001b[0m \u001b[43m \u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 931\u001b[0m \u001b[43m \u001b[49m\u001b[43mfollow_redirects\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfollow_redirects\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 932\u001b[0m \u001b[43m \u001b[49m\u001b[43mhistory\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhistory\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 933\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 934\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 935\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n", + "File \u001b[0;32m~/projects/python_projects/agents_testing/.conda/lib/python3.10/site-packages/httpx/_client.py:966\u001b[0m, in \u001b[0;36mClient._send_handling_redirects\u001b[0;34m(self, request, follow_redirects, history)\u001b[0m\n\u001b[1;32m 963\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m hook \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_event_hooks[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrequest\u001b[39m\u001b[38;5;124m\"\u001b[39m]:\n\u001b[1;32m 964\u001b[0m hook(request)\n\u001b[0;32m--> 966\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_send_single_request\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 967\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 968\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m hook \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_event_hooks[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mresponse\u001b[39m\u001b[38;5;124m\"\u001b[39m]:\n", + "File \u001b[0;32m~/projects/python_projects/agents_testing/.conda/lib/python3.10/site-packages/httpx/_client.py:1002\u001b[0m, in \u001b[0;36mClient._send_single_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 997\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\n\u001b[1;32m 998\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAttempted to send an async request with a sync Client instance.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 999\u001b[0m )\n\u001b[1;32m 1001\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m request_context(request\u001b[38;5;241m=\u001b[39mrequest):\n\u001b[0;32m-> 1002\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mtransport\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhandle_request\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1004\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(response\u001b[38;5;241m.\u001b[39mstream, SyncByteStream)\n\u001b[1;32m 1006\u001b[0m response\u001b[38;5;241m.\u001b[39mrequest \u001b[38;5;241m=\u001b[39m request\n", + "File \u001b[0;32m~/projects/python_projects/agents_testing/.conda/lib/python3.10/site-packages/httpx/_transports/default.py:228\u001b[0m, in \u001b[0;36mHTTPTransport.handle_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 215\u001b[0m req \u001b[38;5;241m=\u001b[39m httpcore\u001b[38;5;241m.\u001b[39mRequest(\n\u001b[1;32m 216\u001b[0m method\u001b[38;5;241m=\u001b[39mrequest\u001b[38;5;241m.\u001b[39mmethod,\n\u001b[1;32m 217\u001b[0m url\u001b[38;5;241m=\u001b[39mhttpcore\u001b[38;5;241m.\u001b[39mURL(\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 225\u001b[0m extensions\u001b[38;5;241m=\u001b[39mrequest\u001b[38;5;241m.\u001b[39mextensions,\n\u001b[1;32m 226\u001b[0m )\n\u001b[1;32m 227\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m map_httpcore_exceptions():\n\u001b[0;32m--> 228\u001b[0m resp \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_pool\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhandle_request\u001b[49m\u001b[43m(\u001b[49m\u001b[43mreq\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 230\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(resp\u001b[38;5;241m.\u001b[39mstream, typing\u001b[38;5;241m.\u001b[39mIterable)\n\u001b[1;32m 232\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m Response(\n\u001b[1;32m 233\u001b[0m status_code\u001b[38;5;241m=\u001b[39mresp\u001b[38;5;241m.\u001b[39mstatus,\n\u001b[1;32m 234\u001b[0m headers\u001b[38;5;241m=\u001b[39mresp\u001b[38;5;241m.\u001b[39mheaders,\n\u001b[1;32m 235\u001b[0m stream\u001b[38;5;241m=\u001b[39mResponseStream(resp\u001b[38;5;241m.\u001b[39mstream),\n\u001b[1;32m 236\u001b[0m extensions\u001b[38;5;241m=\u001b[39mresp\u001b[38;5;241m.\u001b[39mextensions,\n\u001b[1;32m 237\u001b[0m )\n", + "File \u001b[0;32m~/projects/python_projects/agents_testing/.conda/lib/python3.10/site-packages/httpcore/_sync/connection_pool.py:268\u001b[0m, in \u001b[0;36mConnectionPool.handle_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 266\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m ShieldCancellation():\n\u001b[1;32m 267\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mresponse_closed(status)\n\u001b[0;32m--> 268\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exc\n\u001b[1;32m 269\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 270\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n", + "File \u001b[0;32m~/projects/python_projects/agents_testing/.conda/lib/python3.10/site-packages/httpcore/_sync/connection_pool.py:251\u001b[0m, in \u001b[0;36mConnectionPool.handle_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 248\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exc\n\u001b[1;32m 250\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 251\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mconnection\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhandle_request\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 252\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ConnectionNotAvailable:\n\u001b[1;32m 253\u001b[0m \u001b[38;5;66;03m# The ConnectionNotAvailable exception is a special case, that\u001b[39;00m\n\u001b[1;32m 254\u001b[0m \u001b[38;5;66;03m# indicates we need to retry the request on a new connection.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 258\u001b[0m \u001b[38;5;66;03m# might end up as an HTTP/2 connection, but which actually ends\u001b[39;00m\n\u001b[1;32m 259\u001b[0m \u001b[38;5;66;03m# up as HTTP/1.1.\u001b[39;00m\n\u001b[1;32m 260\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_pool_lock:\n\u001b[1;32m 261\u001b[0m \u001b[38;5;66;03m# Maintain our position in the request queue, but reset the\u001b[39;00m\n\u001b[1;32m 262\u001b[0m \u001b[38;5;66;03m# status so that the request becomes queued again.\u001b[39;00m\n", + "File \u001b[0;32m~/projects/python_projects/agents_testing/.conda/lib/python3.10/site-packages/httpcore/_sync/connection.py:103\u001b[0m, in \u001b[0;36mHTTPConnection.handle_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 100\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_connection\u001b[38;5;241m.\u001b[39mis_available():\n\u001b[1;32m 101\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m ConnectionNotAvailable()\n\u001b[0;32m--> 103\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_connection\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhandle_request\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/projects/python_projects/agents_testing/.conda/lib/python3.10/site-packages/httpcore/_sync/http11.py:133\u001b[0m, in \u001b[0;36mHTTP11Connection.handle_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 131\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m Trace(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mresponse_closed\u001b[39m\u001b[38;5;124m\"\u001b[39m, logger, request) \u001b[38;5;28;01mas\u001b[39;00m trace:\n\u001b[1;32m 132\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_response_closed()\n\u001b[0;32m--> 133\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exc\n", + "File \u001b[0;32m~/projects/python_projects/agents_testing/.conda/lib/python3.10/site-packages/httpcore/_sync/http11.py:111\u001b[0m, in \u001b[0;36mHTTP11Connection.handle_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 101\u001b[0m \u001b[38;5;28;01mpass\u001b[39;00m\n\u001b[1;32m 103\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m Trace(\n\u001b[1;32m 104\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mreceive_response_headers\u001b[39m\u001b[38;5;124m\"\u001b[39m, logger, request, kwargs\n\u001b[1;32m 105\u001b[0m ) \u001b[38;5;28;01mas\u001b[39;00m trace:\n\u001b[1;32m 106\u001b[0m (\n\u001b[1;32m 107\u001b[0m http_version,\n\u001b[1;32m 108\u001b[0m status,\n\u001b[1;32m 109\u001b[0m reason_phrase,\n\u001b[1;32m 110\u001b[0m headers,\n\u001b[0;32m--> 111\u001b[0m ) \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_receive_response_headers\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 112\u001b[0m trace\u001b[38;5;241m.\u001b[39mreturn_value \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 113\u001b[0m http_version,\n\u001b[1;32m 114\u001b[0m status,\n\u001b[1;32m 115\u001b[0m reason_phrase,\n\u001b[1;32m 116\u001b[0m headers,\n\u001b[1;32m 117\u001b[0m )\n\u001b[1;32m 119\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m Response(\n\u001b[1;32m 120\u001b[0m status\u001b[38;5;241m=\u001b[39mstatus,\n\u001b[1;32m 121\u001b[0m headers\u001b[38;5;241m=\u001b[39mheaders,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 127\u001b[0m },\n\u001b[1;32m 128\u001b[0m )\n", + "File \u001b[0;32m~/projects/python_projects/agents_testing/.conda/lib/python3.10/site-packages/httpcore/_sync/http11.py:176\u001b[0m, in \u001b[0;36mHTTP11Connection._receive_response_headers\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 173\u001b[0m timeout \u001b[38;5;241m=\u001b[39m timeouts\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mread\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[1;32m 175\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m--> 176\u001b[0m event \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_receive_event\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 177\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(event, h11\u001b[38;5;241m.\u001b[39mResponse):\n\u001b[1;32m 178\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n", + "File \u001b[0;32m~/projects/python_projects/agents_testing/.conda/lib/python3.10/site-packages/httpcore/_sync/http11.py:212\u001b[0m, in \u001b[0;36mHTTP11Connection._receive_event\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 209\u001b[0m event \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_h11_state\u001b[38;5;241m.\u001b[39mnext_event()\n\u001b[1;32m 211\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m event \u001b[38;5;129;01mis\u001b[39;00m h11\u001b[38;5;241m.\u001b[39mNEED_DATA:\n\u001b[0;32m--> 212\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_network_stream\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 213\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mREAD_NUM_BYTES\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\n\u001b[1;32m 214\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 216\u001b[0m \u001b[38;5;66;03m# If we feed this case through h11 we'll raise an exception like:\u001b[39;00m\n\u001b[1;32m 217\u001b[0m \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[1;32m 218\u001b[0m \u001b[38;5;66;03m# httpcore.RemoteProtocolError: can't handle event type\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 222\u001b[0m \u001b[38;5;66;03m# perspective. Instead we handle this case distinctly and treat\u001b[39;00m\n\u001b[1;32m 223\u001b[0m \u001b[38;5;66;03m# it as a ConnectError.\u001b[39;00m\n\u001b[1;32m 224\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m data \u001b[38;5;241m==\u001b[39m \u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_h11_state\u001b[38;5;241m.\u001b[39mtheir_state \u001b[38;5;241m==\u001b[39m h11\u001b[38;5;241m.\u001b[39mSEND_RESPONSE:\n", + "File \u001b[0;32m~/projects/python_projects/agents_testing/.conda/lib/python3.10/site-packages/httpcore/_backends/sync.py:126\u001b[0m, in \u001b[0;36mSyncStream.read\u001b[0;34m(self, max_bytes, timeout)\u001b[0m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m map_exceptions(exc_map):\n\u001b[1;32m 125\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_sock\u001b[38;5;241m.\u001b[39msettimeout(timeout)\n\u001b[0;32m--> 126\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sock\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrecv\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmax_bytes\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/projects/python_projects/agents_testing/.conda/lib/python3.10/ssl.py:1292\u001b[0m, in \u001b[0;36mSSLSocket.recv\u001b[0;34m(self, buflen, flags)\u001b[0m\n\u001b[1;32m 1288\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m flags \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 1289\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 1290\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnon-zero flags not allowed in calls to recv() on \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m\n\u001b[1;32m 1291\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m)\n\u001b[0;32m-> 1292\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbuflen\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1293\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1294\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39mrecv(buflen, flags)\n", + "File \u001b[0;32m~/projects/python_projects/agents_testing/.conda/lib/python3.10/ssl.py:1165\u001b[0m, in \u001b[0;36mSSLSocket.read\u001b[0;34m(self, len, buffer)\u001b[0m\n\u001b[1;32m 1163\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_sslobj\u001b[38;5;241m.\u001b[39mread(\u001b[38;5;28mlen\u001b[39m, buffer)\n\u001b[1;32m 1164\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1165\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sslobj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1166\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m SSLError \u001b[38;5;28;01mas\u001b[39;00m x:\n\u001b[1;32m 1167\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m x\u001b[38;5;241m.\u001b[39margs[\u001b[38;5;241m0\u001b[39m] \u001b[38;5;241m==\u001b[39m SSL_ERROR_EOF \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msuppress_ragged_eofs:\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], + "source": [ + "import json\n", + "import csv\n", + "\n", + "# Assuming we have the following variables from the previous code:\n", + "# crawl_result, client, potential_links\n", + "\n", + "# Convert potential_links to a JSON string\n", + "potential_links_json = json.dumps(potential_links, indent=2)\n", + "\n", + "# Prepare CSV file\n", + "csv_filename = \"link_suggestions.csv\"\n", + "csv_headers = [\"Source Blog Title\", \"Source Blog URL\", \"Target Phrase\", \"Suggested Link URL\"]\n", + "\n", + "# Write headers to the CSV file\n", + "with open(csv_filename, 'w', newline='', encoding='utf-8') as csvfile:\n", + " csvwriter = csv.writer(csvfile)\n", + " csvwriter.writerow(csv_headers)\n", + "\n", + "# Loop through each blog post content\n", + "for item in crawl_result:\n", + " current_blog_url = item[\"metadata\"].get(\"ogUrl\", \"\")\n", + " if current_blog_url == blog_url:\n", + " continue\n", + " current_blog_content = item[\"content\"]\n", + " current_blog_title = item[\"metadata\"].get(\"title\", \"\")\n", + "\n", + " prompt_instructions = f\"\"\"Given this blog post from {current_blog_url} called '{current_blog_title}', analyze the following blog content. Identify 0 to 3 of phrases (5 words max) from the inside of the middle of the article that could be linked to other blog posts from the list of potential links provided inside of . Return a JSON object structured as follows:\n", + "\n", + " {{\n", + " \"link_suggestions\": [\n", + " {{\n", + " \"target_phrase\": \"the EXACT phrase from the to be linked to one of the links in (5 words max)\",\n", + " \"suggested_link_url\": \"url of the suggested internal link from \",\n", + " }}\n", + " ],\n", + " \"metadata\": {{\n", + " \"source_blog_url\": \"{current_blog_url}\",\n", + " \"source_blog_title\": \"{current_blog_title}\",\n", + " }}\n", + " }}\n", + "\n", + " Ensure that you provide the EXACT phrase from in target_phrase (5 words max) to locate each suggestion in the blog content without using character positions. Your target phrases must NOT be a title!\n", + "\n", + " Blog Content:\n", + " \n", + " {current_blog_content}\n", + " \n", + "\n", + " Potential Links:\n", + " \n", + " {potential_links_json}\n", + " \n", + "\n", + " GO AND ONLY RETURN THE JSON NOTHING ELSE:\"\"\"\n", + "\n", + " try:\n", + " message = client.messages.create(\n", + " model=\"claude-3-5-sonnet-20240620\",\n", + " max_tokens=1024,\n", + " messages=[\n", + " {\"role\": \"user\", \"content\": prompt_instructions}\n", + " ]\n", + " )\n", + " \n", + " # Extract the JSON string from the TextBlock\n", + " json_string = message.content[0].text\n", + " \n", + " # Parse the JSON response\n", + " response_json = json.loads(json_string)\n", + " \n", + " # Write suggestions to CSV\n", + " for suggestion in response_json['link_suggestions']:\n", + " print(\"Suggestion for: \" + current_blog_title )\n", + " print(\"Blog phrase: \" + suggestion['target_phrase']) \n", + " print(\"Internal Link: \" + suggestion['suggested_link_url'])\n", + " print(\"---\\n\\n\")\n", + "\n", + " # Open the CSV file in append mode and write the new row\n", + " with open(csv_filename, 'a', newline='', encoding='utf-8') as csvfile:\n", + " csvwriter = csv.writer(csvfile)\n", + " csvwriter.writerow([\n", + " response_json['metadata']['source_blog_title'],\n", + " response_json['metadata']['source_blog_url'],\n", + " suggestion['target_phrase'],\n", + " suggestion['suggested_link_url'],\n", + " ])\n", + " \n", + " except json.JSONDecodeError:\n", + " print(f\"Error parsing JSON response for blog {current_blog_title}\")\n", + " print(\"Raw response:\", message.content)\n", + " except Exception as e:\n", + " print(f\"Error processing blog {current_blog_title}: {str(e)}\")\n", + " \n", + "\n", + "print(f\"Finished processing all blog posts. Results saved to {csv_filename}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 173f4ee1bf0b75ff7582a0fa1756aa35eba27051 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Fri, 23 Aug 2024 20:09:59 -0300 Subject: [PATCH 02/14] Nick: chrome cdp main | simple autoscaler --- .github/workflows/autoscale.yml | 36 ++++++ apps/api/src/controllers/admin/queue.ts | 115 ++++++++++++++---- apps/api/src/routes/admin.ts | 6 + apps/api/src/scraper/WebScraper/single_url.ts | 4 +- 4 files changed, 135 insertions(+), 26 deletions(-) create mode 100644 .github/workflows/autoscale.yml diff --git a/.github/workflows/autoscale.yml b/.github/workflows/autoscale.yml new file mode 100644 index 00000000..6ab7ca74 --- /dev/null +++ b/.github/workflows/autoscale.yml @@ -0,0 +1,36 @@ +name: Simple Autoscaler +on: + schedule: + - cron: '*/0.5 * * * *' + +env: + BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }} + FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }} + +jobs: + scale: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: superfly/flyctl-actions/setup-flyctl@master + - name: Send GET request to check queues + run: | + response=$(curl --silent --max-time 180 https://api.firecrawl.dev/admin/${{ secrets.BULL_AUTH_KEY }}/autoscaler) + http_code=$(echo "$response" | jq -r '.status_code') + + mode=$(echo "$response" | jq -r '.mode') + count=$(echo "$response" | jq -r '.count') + + echo "Mode: $mode" + echo "Count: $count" + + if [ "$mode" = "scale-descale" ]; then + flyctl scale count $count -c fly.staging.toml --process-group=worker --yes + echo "Scaled to $count machines." + else + echo "No scaling needed. Mode: $mode" + fi + env: + FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }} + BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }} + working-directory: apps/api diff --git a/apps/api/src/controllers/admin/queue.ts b/apps/api/src/controllers/admin/queue.ts index 095e7ca7..729ea004 100644 --- a/apps/api/src/controllers/admin/queue.ts +++ b/apps/api/src/controllers/admin/queue.ts @@ -4,6 +4,7 @@ import { Job } from "bullmq"; import { Logger } from "../../lib/logger"; import { getScrapeQueue } from "../../services/queue-service"; import { checkAlerts } from "../../services/alerts"; +import { exec } from "node:child_process"; export async function cleanBefore24hCompleteJobsController( req: Request, @@ -54,34 +55,100 @@ export async function cleanBefore24hCompleteJobsController( } } - export async function checkQueuesController(req: Request, res: Response) { - try { - await checkAlerts(); - return res.status(200).send("Alerts initialized"); - } catch (error) { - Logger.debug(`Failed to initialize alerts: ${error}`); - return res.status(500).send("Failed to initialize alerts"); - } + try { + await checkAlerts(); + return res.status(200).send("Alerts initialized"); + } catch (error) { + Logger.debug(`Failed to initialize alerts: ${error}`); + return res.status(500).send("Failed to initialize alerts"); } +} - // Use this as a "health check" that way we dont destroy the server +// Use this as a "health check" that way we dont destroy the server export async function queuesController(req: Request, res: Response) { - try { - const scrapeQueue = getScrapeQueue(); + try { + const scrapeQueue = getScrapeQueue(); - const [webScraperActive] = await Promise.all([ - scrapeQueue.getActiveCount(), - ]); + const [webScraperActive] = await Promise.all([ + scrapeQueue.getActiveCount(), + ]); - const noActiveJobs = webScraperActive === 0; - // 200 if no active jobs, 503 if there are active jobs - return res.status(noActiveJobs ? 200 : 500).json({ - webScraperActive, - noActiveJobs, - }); - } catch (error) { - Logger.error(error); - return res.status(500).json({ error: error.message }); + const noActiveJobs = webScraperActive === 0; + // 200 if no active jobs, 503 if there are active jobs + return res.status(noActiveJobs ? 200 : 500).json({ + webScraperActive, + noActiveJobs, + }); + } catch (error) { + Logger.error(error); + return res.status(500).json({ error: error.message }); + } +} + +export async function autoscalerController(req: Request, res: Response) { + try { + const maxNumberOfMachines = 100; + const minNumberOfMachines = 20; + + const scrapeQueue = getScrapeQueue(); + + const [webScraperActive, webScraperWaiting, webScraperPriority] = await Promise.all([ + scrapeQueue.getActiveCount(), + scrapeQueue.getWaitingCount(), + scrapeQueue.getPrioritizedCount(), + ]); + + let waitingAndPriorityCount = webScraperWaiting + webScraperPriority; + + // get number of machines active + const request = await fetch('https://api.machines.dev/v1/apps/firecrawl-scraper-js/machines', + { + headers: { + 'Authorization': `Bearer ${process.env.FLY_API_TOKEN}` + } + } + ) + const machines = await request.json(); + const activeMachines = machines.filter(machine => machine.state === 'started' || machine.state === "starting").length; + + let targetMachineCount = activeMachines; + + const baseScaleUp = 10; + const baseScaleDown = 5; + + // Scale up logic + if (webScraperActive > 9000 || waitingAndPriorityCount > 2000) { + targetMachineCount = Math.min(maxNumberOfMachines, activeMachines + (baseScaleUp * 3)); + } else if (webScraperActive > 5000 || waitingAndPriorityCount > 1000) { + targetMachineCount = Math.min(maxNumberOfMachines, activeMachines + (baseScaleUp * 2)); + } else if (webScraperActive > 1000 || waitingAndPriorityCount > 500) { + targetMachineCount = Math.min(maxNumberOfMachines, activeMachines + baseScaleUp); } - } \ No newline at end of file + + // Scale down logic + if (webScraperActive < 100 && waitingAndPriorityCount < 50) { + targetMachineCount = Math.max(minNumberOfMachines, activeMachines - (baseScaleDown * 3)); + } else if (webScraperActive < 500 && waitingAndPriorityCount < 200) { + targetMachineCount = Math.max(minNumberOfMachines, activeMachines - (baseScaleDown * 2)); + } else if (webScraperActive < 1000 && waitingAndPriorityCount < 500) { + targetMachineCount = Math.max(minNumberOfMachines, activeMachines - baseScaleDown); + } + + if (targetMachineCount !== activeMachines) { + Logger.info(`🐂 Scaling from ${activeMachines} to ${targetMachineCount} - ${webScraperActive} active, ${webScraperWaiting} waiting`); + return res.status(200).json({ + mode: "scale-descale", + count: targetMachineCount, + }); + } + + return res.status(200).json({ + mode: "normal", + count: activeMachines, + }); + } catch (error) { + Logger.error(error); + return res.status(500).send("Failed to initialize autoscaler"); + } +} diff --git a/apps/api/src/routes/admin.ts b/apps/api/src/routes/admin.ts index 77d1bf46..d32808ce 100644 --- a/apps/api/src/routes/admin.ts +++ b/apps/api/src/routes/admin.ts @@ -1,6 +1,7 @@ import express from "express"; import { redisHealthController } from "../controllers/admin/redis-health"; import { + autoscalerController, checkQueuesController, cleanBefore24hCompleteJobsController, queuesController, @@ -27,3 +28,8 @@ adminRouter.get( `/admin/${process.env.BULL_AUTH_KEY}/queues`, queuesController ); + +adminRouter.get( + `/admin/${process.env.BULL_AUTH_KEY}/autoscaler`, + autoscalerController +); diff --git a/apps/api/src/scraper/WebScraper/single_url.ts b/apps/api/src/scraper/WebScraper/single_url.ts index 1f2a62de..6998a665 100644 --- a/apps/api/src/scraper/WebScraper/single_url.ts +++ b/apps/api/src/scraper/WebScraper/single_url.ts @@ -24,8 +24,8 @@ import { clientSideError } from "../../strings"; dotenv.config(); export const baseScrapers = [ - "fire-engine", "fire-engine;chrome-cdp", + "fire-engine", "scrapingBee", process.env.USE_DB_AUTHENTICATION ? undefined : "playwright", "scrapingBeeLoad", @@ -85,8 +85,8 @@ function getScrapingFallbackOrder( }); let defaultOrder = [ - !process.env.USE_DB_AUTHENTICATION ? undefined : "fire-engine", !process.env.USE_DB_AUTHENTICATION ? undefined : "fire-engine;chrome-cdp", + !process.env.USE_DB_AUTHENTICATION ? undefined : "fire-engine", "scrapingBee", process.env.USE_DB_AUTHENTICATION ? undefined : "playwright", "scrapingBeeLoad", From 0dc592b3e420f4979f173fadfd4f3e01769ce53a Mon Sep 17 00:00:00 2001 From: Nicolas Date: Fri, 23 Aug 2024 20:11:30 -0300 Subject: [PATCH 03/14] Update autoscale.yml --- .github/workflows/autoscale.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/autoscale.yml b/.github/workflows/autoscale.yml index 6ab7ca74..189a1755 100644 --- a/.github/workflows/autoscale.yml +++ b/.github/workflows/autoscale.yml @@ -1,7 +1,7 @@ name: Simple Autoscaler on: schedule: - - cron: '*/0.5 * * * *' + - cron: '*/1 * * * *' env: BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }} From b23bf2eef9a00673d22b01273a00383ccd235ee4 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Fri, 23 Aug 2024 20:14:35 -0300 Subject: [PATCH 04/14] Update autoscale.yml --- .github/workflows/autoscale.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/autoscale.yml b/.github/workflows/autoscale.yml index 189a1755..694c8cac 100644 --- a/.github/workflows/autoscale.yml +++ b/.github/workflows/autoscale.yml @@ -1,7 +1,7 @@ name: Simple Autoscaler on: schedule: - - cron: '*/1 * * * *' + - cron: '* * * * *' env: BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }} From 28d7a637c21fd58908da37aba7c961e562e46923 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Fri, 23 Aug 2024 22:07:49 -0300 Subject: [PATCH 05/14] Update queue.ts --- apps/api/src/controllers/admin/queue.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/apps/api/src/controllers/admin/queue.ts b/apps/api/src/controllers/admin/queue.ts index 729ea004..8ce12942 100644 --- a/apps/api/src/controllers/admin/queue.ts +++ b/apps/api/src/controllers/admin/queue.ts @@ -115,7 +115,8 @@ export async function autoscalerController(req: Request, res: Response) { let targetMachineCount = activeMachines; const baseScaleUp = 10; - const baseScaleDown = 5; + // Slow scale down + const baseScaleDown = 2; // Scale up logic if (webScraperActive > 9000 || waitingAndPriorityCount > 2000) { From 8e78511ed49a1b5a69c52117501943a3f53d1179 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Fri, 23 Aug 2024 22:15:47 -0300 Subject: [PATCH 06/14] Update queue.ts --- apps/api/src/controllers/admin/queue.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/apps/api/src/controllers/admin/queue.ts b/apps/api/src/controllers/admin/queue.ts index 8ce12942..43bf2e98 100644 --- a/apps/api/src/controllers/admin/queue.ts +++ b/apps/api/src/controllers/admin/queue.ts @@ -110,7 +110,9 @@ export async function autoscalerController(req: Request, res: Response) { } ) const machines = await request.json(); - const activeMachines = machines.filter(machine => machine.state === 'started' || machine.state === "starting").length; + + // Only worker machines + const activeMachines = machines.filter(machine => (machine.state === 'started' || machine.state === "starting") && machine.config.env["FLY_PROCESS_GROUP"] === "worker").length; let targetMachineCount = activeMachines; From b9e06e27f400d2f7fdfdec6996398897722f24ad Mon Sep 17 00:00:00 2001 From: Nicolas Date: Fri, 23 Aug 2024 22:17:27 -0300 Subject: [PATCH 07/14] Update queue.ts --- apps/api/src/controllers/admin/queue.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/api/src/controllers/admin/queue.ts b/apps/api/src/controllers/admin/queue.ts index 43bf2e98..f3244d2c 100644 --- a/apps/api/src/controllers/admin/queue.ts +++ b/apps/api/src/controllers/admin/queue.ts @@ -88,7 +88,7 @@ export async function queuesController(req: Request, res: Response) { export async function autoscalerController(req: Request, res: Response) { try { - const maxNumberOfMachines = 100; + const maxNumberOfMachines = 80; const minNumberOfMachines = 20; const scrapeQueue = getScrapeQueue(); @@ -110,7 +110,7 @@ export async function autoscalerController(req: Request, res: Response) { } ) const machines = await request.json(); - + // Only worker machines const activeMachines = machines.filter(machine => (machine.state === 'started' || machine.state === "starting") && machine.config.env["FLY_PROCESS_GROUP"] === "worker").length; From d87b62fed903d3c7642bf353c1e7026d3d004d8d Mon Sep 17 00:00:00 2001 From: Nicolas Date: Fri, 23 Aug 2024 22:33:17 -0300 Subject: [PATCH 08/14] Nick: --- apps/api/src/controllers/admin/queue.ts | 7 +++++++ apps/api/src/services/alerts/slack.ts | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/apps/api/src/controllers/admin/queue.ts b/apps/api/src/controllers/admin/queue.ts index f3244d2c..2923ebba 100644 --- a/apps/api/src/controllers/admin/queue.ts +++ b/apps/api/src/controllers/admin/queue.ts @@ -5,6 +5,7 @@ import { Logger } from "../../lib/logger"; import { getScrapeQueue } from "../../services/queue-service"; import { checkAlerts } from "../../services/alerts"; import { exec } from "node:child_process"; +import { sendSlackWebhook } from "../../services/alerts/slack"; export async function cleanBefore24hCompleteJobsController( req: Request, @@ -140,6 +141,12 @@ export async function autoscalerController(req: Request, res: Response) { if (targetMachineCount !== activeMachines) { Logger.info(`🐂 Scaling from ${activeMachines} to ${targetMachineCount} - ${webScraperActive} active, ${webScraperWaiting} waiting`); + + if(targetMachineCount > activeMachines) { + sendSlackWebhook("🐂 Scaling up to " + targetMachineCount + " machines", false, process.env.SLACK_AUTOSCALER ?? ""); + } else { + sendSlackWebhook("🐂 Scaling down to " + targetMachineCount + " machines", false, process.env.SLACK_AUTOSCALER ?? ""); + } return res.status(200).json({ mode: "scale-descale", count: targetMachineCount, diff --git a/apps/api/src/services/alerts/slack.ts b/apps/api/src/services/alerts/slack.ts index 96bf1c09..0fa75693 100644 --- a/apps/api/src/services/alerts/slack.ts +++ b/apps/api/src/services/alerts/slack.ts @@ -3,9 +3,9 @@ import { Logger } from "../../../src/lib/logger"; export async function sendSlackWebhook( message: string, - alertEveryone: boolean = false + alertEveryone: boolean = false, + webhookUrl: string = process.env.SLACK_WEBHOOK_URL ?? "" ) { - const webhookUrl = process.env.SLACK_WEBHOOK_URL; const messagePrefix = alertEveryone ? " " : ""; const payload = { text: `${messagePrefix} ${message}`, From b80277d4de1e9101482d4ec3856cfed05d1ed95e Mon Sep 17 00:00:00 2001 From: Nicolas Date: Fri, 23 Aug 2024 22:46:44 -0300 Subject: [PATCH 09/14] Update queue.ts --- apps/api/src/controllers/admin/queue.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/apps/api/src/controllers/admin/queue.ts b/apps/api/src/controllers/admin/queue.ts index 2923ebba..6a46cfec 100644 --- a/apps/api/src/controllers/admin/queue.ts +++ b/apps/api/src/controllers/admin/queue.ts @@ -113,7 +113,7 @@ export async function autoscalerController(req: Request, res: Response) { const machines = await request.json(); // Only worker machines - const activeMachines = machines.filter(machine => (machine.state === 'started' || machine.state === "starting") && machine.config.env["FLY_PROCESS_GROUP"] === "worker").length; + const activeMachines = machines.filter(machine => (machine.state === 'started' || machine.state === "starting" || machine.state === "replacing") && machine.config.env["FLY_PROCESS_GROUP"] === "worker").length; let targetMachineCount = activeMachines; @@ -143,9 +143,9 @@ export async function autoscalerController(req: Request, res: Response) { Logger.info(`🐂 Scaling from ${activeMachines} to ${targetMachineCount} - ${webScraperActive} active, ${webScraperWaiting} waiting`); if(targetMachineCount > activeMachines) { - sendSlackWebhook("🐂 Scaling up to " + targetMachineCount + " machines", false, process.env.SLACK_AUTOSCALER ?? ""); + sendSlackWebhook(`🐂 Scaling from ${activeMachines} to ${targetMachineCount} - ${webScraperActive} active, ${webScraperWaiting} waiting`, false, process.env.SLACK_AUTOSCALER ?? ""); } else { - sendSlackWebhook("🐂 Scaling down to " + targetMachineCount + " machines", false, process.env.SLACK_AUTOSCALER ?? ""); + sendSlackWebhook(`🐂 Scaling from ${activeMachines} to ${targetMachineCount} - ${webScraperActive} active, ${webScraperWaiting} waiting`, false, process.env.SLACK_AUTOSCALER ?? ""); } return res.status(200).json({ mode: "scale-descale", From 1f99bfd3c80df752a6ae0f0d2e505341e71b2255 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Fri, 23 Aug 2024 22:47:12 -0300 Subject: [PATCH 10/14] Update queue.ts --- apps/api/src/controllers/admin/queue.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/api/src/controllers/admin/queue.ts b/apps/api/src/controllers/admin/queue.ts index 6a46cfec..06844bea 100644 --- a/apps/api/src/controllers/admin/queue.ts +++ b/apps/api/src/controllers/admin/queue.ts @@ -143,9 +143,9 @@ export async function autoscalerController(req: Request, res: Response) { Logger.info(`🐂 Scaling from ${activeMachines} to ${targetMachineCount} - ${webScraperActive} active, ${webScraperWaiting} waiting`); if(targetMachineCount > activeMachines) { - sendSlackWebhook(`🐂 Scaling from ${activeMachines} to ${targetMachineCount} - ${webScraperActive} active, ${webScraperWaiting} waiting`, false, process.env.SLACK_AUTOSCALER ?? ""); + sendSlackWebhook(`🐂 Scaling from ${activeMachines} to ${targetMachineCount} - ${webScraperActive} active, ${webScraperWaiting} waiting - Current DateTime: ${new Date().toISOString()}`, false, process.env.SLACK_AUTOSCALER ?? ""); } else { - sendSlackWebhook(`🐂 Scaling from ${activeMachines} to ${targetMachineCount} - ${webScraperActive} active, ${webScraperWaiting} waiting`, false, process.env.SLACK_AUTOSCALER ?? ""); + sendSlackWebhook(`🐂 Scaling from ${activeMachines} to ${targetMachineCount} - ${webScraperActive} active, ${webScraperWaiting} waiting - Current DateTime: ${new Date().toISOString()}`, false, process.env.SLACK_AUTOSCALER ?? ""); } return res.status(200).json({ mode: "scale-descale", From 4e196a9146b214b80476f7d143158b4bc4a2fe94 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sun, 25 Aug 2024 01:48:51 -0300 Subject: [PATCH 11/14] Delete autoscale.yml --- .github/workflows/autoscale.yml | 36 --------------------------------- 1 file changed, 36 deletions(-) delete mode 100644 .github/workflows/autoscale.yml diff --git a/.github/workflows/autoscale.yml b/.github/workflows/autoscale.yml deleted file mode 100644 index 694c8cac..00000000 --- a/.github/workflows/autoscale.yml +++ /dev/null @@ -1,36 +0,0 @@ -name: Simple Autoscaler -on: - schedule: - - cron: '* * * * *' - -env: - BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }} - FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }} - -jobs: - scale: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - uses: superfly/flyctl-actions/setup-flyctl@master - - name: Send GET request to check queues - run: | - response=$(curl --silent --max-time 180 https://api.firecrawl.dev/admin/${{ secrets.BULL_AUTH_KEY }}/autoscaler) - http_code=$(echo "$response" | jq -r '.status_code') - - mode=$(echo "$response" | jq -r '.mode') - count=$(echo "$response" | jq -r '.count') - - echo "Mode: $mode" - echo "Count: $count" - - if [ "$mode" = "scale-descale" ]; then - flyctl scale count $count -c fly.staging.toml --process-group=worker --yes - echo "Scaled to $count machines." - else - echo "No scaling needed. Mode: $mode" - fi - env: - FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }} - BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }} - working-directory: apps/api From 6f9a2687ae995d7ab43469bf3f74cf9008af7c80 Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Sun, 25 Aug 2024 15:04:32 -0300 Subject: [PATCH 12/14] fixed turndown bug --- apps/api/src/lib/html-to-markdown.ts | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/apps/api/src/lib/html-to-markdown.ts b/apps/api/src/lib/html-to-markdown.ts index 233da921..002cb7be 100644 --- a/apps/api/src/lib/html-to-markdown.ts +++ b/apps/api/src/lib/html-to-markdown.ts @@ -1,5 +1,5 @@ -export function parseMarkdown(html: string) { +export async function parseMarkdown(html: string) { var TurndownService = require("turndown"); var turndownPluginGfm = require('joplin-turndown-plugin-gfm') @@ -21,7 +21,27 @@ export function parseMarkdown(html: string) { }); var gfm = turndownPluginGfm.gfm; turndownService.use(gfm); - let markdownContent = turndownService.turndown(html); + let markdownContent = ""; + const turndownPromise = new Promise((resolve, reject) => { + try { + const result = turndownService.turndown(html); + resolve(result); + } catch (error) { + reject("Error converting HTML to Markdown: " + error); + } + }); + + const timeoutPromise = new Promise((resolve, reject) => { + const timeout = 5000; // Timeout in milliseconds + setTimeout(() => reject("Conversion timed out after " + timeout + "ms"), timeout); + }); + + try { + markdownContent = await Promise.race([turndownPromise, timeoutPromise]); + } catch (error) { + console.error(error); + return ""; // Optionally return an empty string or handle the error as needed + } // multiple line links let insideLinkContent = false; From d591e0f51c6a2bb7b58b59618e0d75a435ce5d05 Mon Sep 17 00:00:00 2001 From: Gergo Moricz Date: Sun, 25 Aug 2024 20:05:17 +0200 Subject: [PATCH 13/14] block corterix.com for performance issues --- apps/api/src/scraper/WebScraper/utils/blocklist.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/apps/api/src/scraper/WebScraper/utils/blocklist.ts b/apps/api/src/scraper/WebScraper/utils/blocklist.ts index 7b1ee19c..99eb6bd2 100644 --- a/apps/api/src/scraper/WebScraper/utils/blocklist.ts +++ b/apps/api/src/scraper/WebScraper/utils/blocklist.ts @@ -15,7 +15,8 @@ const socialMediaBlocklist = [ 'wechat.com', 'telegram.org', 'researchhub.com', - 'youtube.com' + 'youtube.com', + 'corterix.com', ]; const allowedKeywords = [ From 5606fe587068090bf1c7e55368b1e044e34f0b09 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Mon, 26 Aug 2024 16:05:11 -0300 Subject: [PATCH 14/14] Nick: --- apps/api/src/controllers/auth.ts | 3 ++- apps/api/src/services/rate-limiter.ts | 14 +++++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/apps/api/src/controllers/auth.ts b/apps/api/src/controllers/auth.ts index ac60dc53..151733c0 100644 --- a/apps/api/src/controllers/auth.ts +++ b/apps/api/src/controllers/auth.ts @@ -222,7 +222,8 @@ export async function supaAuthenticateUser( rateLimiter = getRateLimiter( RateLimiterMode.Scrape, token, - subscriptionData.plan + subscriptionData.plan, + teamId ); break; case RateLimiterMode.Search: diff --git a/apps/api/src/services/rate-limiter.ts b/apps/api/src/services/rate-limiter.ts index cd923c4c..d96da069 100644 --- a/apps/api/src/services/rate-limiter.ts +++ b/apps/api/src/services/rate-limiter.ts @@ -84,16 +84,28 @@ export const testSuiteRateLimiter = new RateLimiterRedis({ duration: 60, // Duration in seconds }); +export const devBRateLimiter = new RateLimiterRedis({ + storeClient: redisRateLimitClient, + keyPrefix: "dev-b", + points: 1200, + duration: 60, // Duration in seconds +}); + export function getRateLimiter( mode: RateLimiterMode, token: string, - plan?: string + plan?: string, + teamId?: string ) { if (token.includes("a01ccae") || token.includes("6254cf9") || token.includes("0f96e673")) { return testSuiteRateLimiter; } + if(teamId === process.env.DEV_B_TEAM_ID) { + return devBRateLimiter; + } + const rateLimitConfig = RATE_LIMITS[mode]; // {default : 5} if (!rateLimitConfig) return serverRateLimiter;