diff --git a/apps/api/src/controllers/admin/queue.ts b/apps/api/src/controllers/admin/queue.ts index 095e7ca7..06844bea 100644 --- a/apps/api/src/controllers/admin/queue.ts +++ b/apps/api/src/controllers/admin/queue.ts @@ -4,6 +4,8 @@ import { Job } from "bullmq"; import { Logger } from "../../lib/logger"; import { getScrapeQueue } from "../../services/queue-service"; import { checkAlerts } from "../../services/alerts"; +import { exec } from "node:child_process"; +import { sendSlackWebhook } from "../../services/alerts/slack"; export async function cleanBefore24hCompleteJobsController( req: Request, @@ -54,34 +56,109 @@ export async function cleanBefore24hCompleteJobsController( } } - export async function checkQueuesController(req: Request, res: Response) { - try { - await checkAlerts(); - return res.status(200).send("Alerts initialized"); - } catch (error) { - Logger.debug(`Failed to initialize alerts: ${error}`); - return res.status(500).send("Failed to initialize alerts"); - } + try { + await checkAlerts(); + return res.status(200).send("Alerts initialized"); + } catch (error) { + Logger.debug(`Failed to initialize alerts: ${error}`); + return res.status(500).send("Failed to initialize alerts"); } +} - // Use this as a "health check" that way we dont destroy the server +// Use this as a "health check" that way we dont destroy the server export async function queuesController(req: Request, res: Response) { - try { - const scrapeQueue = getScrapeQueue(); + try { + const scrapeQueue = getScrapeQueue(); - const [webScraperActive] = await Promise.all([ - scrapeQueue.getActiveCount(), - ]); + const [webScraperActive] = await Promise.all([ + scrapeQueue.getActiveCount(), + ]); - const noActiveJobs = webScraperActive === 0; - // 200 if no active jobs, 503 if there are active jobs - return res.status(noActiveJobs ? 200 : 500).json({ - webScraperActive, - noActiveJobs, - }); - } catch (error) { - Logger.error(error); - return res.status(500).json({ error: error.message }); + const noActiveJobs = webScraperActive === 0; + // 200 if no active jobs, 503 if there are active jobs + return res.status(noActiveJobs ? 200 : 500).json({ + webScraperActive, + noActiveJobs, + }); + } catch (error) { + Logger.error(error); + return res.status(500).json({ error: error.message }); + } +} + +export async function autoscalerController(req: Request, res: Response) { + try { + const maxNumberOfMachines = 80; + const minNumberOfMachines = 20; + + const scrapeQueue = getScrapeQueue(); + + const [webScraperActive, webScraperWaiting, webScraperPriority] = await Promise.all([ + scrapeQueue.getActiveCount(), + scrapeQueue.getWaitingCount(), + scrapeQueue.getPrioritizedCount(), + ]); + + let waitingAndPriorityCount = webScraperWaiting + webScraperPriority; + + // get number of machines active + const request = await fetch('https://api.machines.dev/v1/apps/firecrawl-scraper-js/machines', + { + headers: { + 'Authorization': `Bearer ${process.env.FLY_API_TOKEN}` + } + } + ) + const machines = await request.json(); + + // Only worker machines + const activeMachines = machines.filter(machine => (machine.state === 'started' || machine.state === "starting" || machine.state === "replacing") && machine.config.env["FLY_PROCESS_GROUP"] === "worker").length; + + let targetMachineCount = activeMachines; + + const baseScaleUp = 10; + // Slow scale down + const baseScaleDown = 2; + + // Scale up logic + if (webScraperActive > 9000 || waitingAndPriorityCount > 2000) { + targetMachineCount = Math.min(maxNumberOfMachines, activeMachines + (baseScaleUp * 3)); + } else if (webScraperActive > 5000 || waitingAndPriorityCount > 1000) { + targetMachineCount = Math.min(maxNumberOfMachines, activeMachines + (baseScaleUp * 2)); + } else if (webScraperActive > 1000 || waitingAndPriorityCount > 500) { + targetMachineCount = Math.min(maxNumberOfMachines, activeMachines + baseScaleUp); } - } \ No newline at end of file + + // Scale down logic + if (webScraperActive < 100 && waitingAndPriorityCount < 50) { + targetMachineCount = Math.max(minNumberOfMachines, activeMachines - (baseScaleDown * 3)); + } else if (webScraperActive < 500 && waitingAndPriorityCount < 200) { + targetMachineCount = Math.max(minNumberOfMachines, activeMachines - (baseScaleDown * 2)); + } else if (webScraperActive < 1000 && waitingAndPriorityCount < 500) { + targetMachineCount = Math.max(minNumberOfMachines, activeMachines - baseScaleDown); + } + + if (targetMachineCount !== activeMachines) { + Logger.info(`🐂 Scaling from ${activeMachines} to ${targetMachineCount} - ${webScraperActive} active, ${webScraperWaiting} waiting`); + + if(targetMachineCount > activeMachines) { + sendSlackWebhook(`🐂 Scaling from ${activeMachines} to ${targetMachineCount} - ${webScraperActive} active, ${webScraperWaiting} waiting - Current DateTime: ${new Date().toISOString()}`, false, process.env.SLACK_AUTOSCALER ?? ""); + } else { + sendSlackWebhook(`🐂 Scaling from ${activeMachines} to ${targetMachineCount} - ${webScraperActive} active, ${webScraperWaiting} waiting - Current DateTime: ${new Date().toISOString()}`, false, process.env.SLACK_AUTOSCALER ?? ""); + } + return res.status(200).json({ + mode: "scale-descale", + count: targetMachineCount, + }); + } + + return res.status(200).json({ + mode: "normal", + count: activeMachines, + }); + } catch (error) { + Logger.error(error); + return res.status(500).send("Failed to initialize autoscaler"); + } +} diff --git a/apps/api/src/controllers/auth.ts b/apps/api/src/controllers/auth.ts index 5b8a141b..1796acc2 100644 --- a/apps/api/src/controllers/auth.ts +++ b/apps/api/src/controllers/auth.ts @@ -62,8 +62,10 @@ async function getKeyAndPriceId(normalizedApi: string): Promise<{ }; } if (!data || data.length === 0) { - Logger.warn(`Error fetching api key: ${error.message} or data is empty`); - Sentry.captureException(error); + if (error) { + Logger.warn(`Error fetching api key: ${error.message} or data is empty`); + Sentry.captureException(error); + } // TODO: change this error code ? return { success: false, @@ -221,7 +223,8 @@ export async function supaAuthenticateUser( rateLimiter = getRateLimiter( RateLimiterMode.Scrape, token, - subscriptionData.plan + subscriptionData.plan, + teamId ); break; case RateLimiterMode.Search: @@ -310,8 +313,8 @@ export async function supaAuthenticateUser( if (error || !data || data.length === 0) { if (error) { Sentry.captureException(error); + Logger.warn(`Error fetching api key: ${error.message} or data is empty`); } - Logger.warn(`Error fetching api key: ${error.message} or data is empty`); return { success: false, error: "Unauthorized: Invalid token", diff --git a/apps/api/src/controllers/crawl.ts b/apps/api/src/controllers/crawl.ts index 7bd9b373..d2123d82 100644 --- a/apps/api/src/controllers/crawl.ts +++ b/apps/api/src/controllers/crawl.ts @@ -58,6 +58,26 @@ export async function crawlController(req: Request, res: Response) { }; const pageOptions = { ...defaultCrawlPageOptions, ...req.body.pageOptions }; + if (Array.isArray(crawlerOptions.includes)) { + for (const x of crawlerOptions.includes) { + try { + new RegExp(x); + } catch (e) { + return res.status(400).json({ error: e.message }); + } + } + } + + if (Array.isArray(crawlerOptions.excludes)) { + for (const x of crawlerOptions.excludes) { + try { + new RegExp(x); + } catch (e) { + return res.status(400).json({ error: e.message }); + } + } + } + const limitCheck = req.body?.crawlerOptions?.limit ?? 1; const { success: creditsCheckSuccess, message: creditsCheckMessage, remainingCredits } = await checkTeamCredits(team_id, limitCheck); @@ -73,6 +93,9 @@ export async function crawlController(req: Request, res: Response) { if (!url) { return res.status(400).json({ error: "Url is required" }); } + if (typeof url !== "string") { + return res.status(400).json({ error: "URL must be a string" }); + } try { url = checkAndUpdateURL(url).url; } catch (e) { @@ -88,8 +111,6 @@ export async function crawlController(req: Request, res: Response) { }); } - const mode = req.body.mode ?? "crawl"; - // if (mode === "single_urls" && !url.includes(",")) { // NOTE: do we need this? // try { // const a = new WebScraperDataProvider(); @@ -144,8 +165,8 @@ export async function crawlController(req: Request, res: Response) { ? null : await crawler.tryGetSitemap(); - if (sitemap !== null) { - + + if (sitemap !== null && sitemap.length > 0) { let jobPriority = 20; // If it is over 1000, we need to get the job priority, // otherwise we can use the default priority of 20 @@ -153,7 +174,6 @@ export async function crawlController(req: Request, res: Response) { // set base to 21 jobPriority = await getJobPriority({plan, team_id, basePriority: 21}) } - const jobs = sitemap.map((x) => { const url = x.url; const uuid = uuidv4(); @@ -184,7 +204,14 @@ export async function crawlController(req: Request, res: Response) { id, jobs.map((x) => x.opts.jobId) ); - await getScrapeQueue().addBulk(jobs); + if (Sentry.isInitialized()) { + for (const job of jobs) { + // add with sentry instrumentation + await addScrapeJob(job.data as any, {}, job.opts.jobId); + } + } else { + await getScrapeQueue().addBulk(jobs); + } } else { await lockURL(id, sc, url); diff --git a/apps/api/src/controllers/scrape.ts b/apps/api/src/controllers/scrape.ts index f33aa058..1ff9a426 100644 --- a/apps/api/src/controllers/scrape.ts +++ b/apps/api/src/controllers/scrape.ts @@ -1,4 +1,4 @@ -import { ExtractorOptions, PageOptions } from './../lib/entities'; + import { ExtractorOptions, PageOptions } from './../lib/entities'; import { Request, Response } from "express"; import { billTeam, checkTeamCredits } from "../services/billing/credit_billing"; import { authenticateUser } from "./auth"; @@ -9,7 +9,7 @@ import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist"; // Import import { numTokensFromString } from '../lib/LLM-extraction/helpers'; import { defaultPageOptions, defaultExtractorOptions, defaultTimeout, defaultOrigin } from '../lib/default-values'; import { addScrapeJob } from '../services/queue-jobs'; -import { scrapeQueueEvents } from '../services/queue-service'; +import { getScrapeQueue } from '../services/queue-service'; import { v4 as uuidv4 } from "uuid"; import { Logger } from '../lib/logger'; import { getJobPriority } from '../lib/job-priority'; @@ -52,18 +52,51 @@ export async function scrapeHelper( }, {}, jobId, jobPriority); let doc; - try { - doc = (await job.waitUntilFinished(scrapeQueueEvents, timeout))[0]; //60 seconds timeout - } catch (e) { - if (e instanceof Error && e.message.startsWith("Job wait")) { - return { - success: false, - error: "Request timed out", - returnCode: 408, + + const err = await Sentry.startSpan({ name: "Wait for job to finish", op: "bullmq.wait", attributes: { job: jobId } }, async (span) => { + try { + doc = (await new Promise((resolve, reject) => { + const start = Date.now(); + const int = setInterval(async () => { + if (Date.now() >= start + timeout) { + clearInterval(int); + reject(new Error("Job wait ")); + } else { + const state = await job.getState(); + if (state === "completed") { + clearInterval(int); + resolve((await getScrapeQueue().getJob(job.id)).returnvalue); + } else if (state === "failed") { + clearInterval(int); + reject((await getScrapeQueue().getJob(job.id)).failedReason); + } + } + }, 1000); + }))[0] + } catch (e) { + if (e instanceof Error && e.message.startsWith("Job wait")) { + span.setAttribute("timedOut", true); + return { + success: false, + error: "Request timed out", + returnCode: 408, + } + } else if (typeof e === "string" && (e.includes("Error generating completions: ") || e.includes("Invalid schema for function") || e.includes("LLM extraction did not match the extraction schema you provided."))) { + return { + success: false, + error: e, + returnCode: 500, + }; + } else { + throw e; } - } else { - throw e; } + span.setAttribute("result", JSON.stringify(doc)); + return null; + }); + + if (err !== null) { + return err; } await job.remove(); @@ -108,6 +141,10 @@ export async function scrapeController(req: Request, res: Response) { let timeout = req.body.timeout ?? defaultTimeout; if (extractorOptions.mode.includes("llm-extraction")) { + if (typeof extractorOptions.extractionSchema !== "object" || extractorOptions.extractionSchema === null) { + return res.status(400).json({ error: "extractorOptions.extractionSchema must be an object if llm-extraction mode is specified" }); + } + pageOptions.onlyMainContent = true; timeout = req.body.timeout ?? 90000; } @@ -192,6 +229,6 @@ export async function scrapeController(req: Request, res: Response) { } catch (error) { Sentry.captureException(error); Logger.error(error); - return res.status(500).json({ error: error.message }); + return res.status(500).json({ error: typeof error === "string" ? error : (error?.message ?? "Internal Server Error") }); } } diff --git a/apps/api/src/controllers/search.ts b/apps/api/src/controllers/search.ts index a8bf3a69..92efe1df 100644 --- a/apps/api/src/controllers/search.ts +++ b/apps/api/src/controllers/search.ts @@ -9,9 +9,10 @@ import { search } from "../search"; import { isUrlBlocked } from "../scraper/WebScraper/utils/blocklist"; import { v4 as uuidv4 } from "uuid"; import { Logger } from "../lib/logger"; -import { getScrapeQueue, scrapeQueueEvents } from "../services/queue-service"; import { getJobPriority } from "../lib/job-priority"; +import { getScrapeQueue } from "../services/queue-service"; import * as Sentry from "@sentry/node"; +import { addScrapeJob } from "../services/queue-jobs"; export async function searchHelper( jobId: string, @@ -99,10 +100,36 @@ export async function searchHelper( } }; }) - - const jobs = await getScrapeQueue().addBulk(jobDatas); - const docs = (await Promise.all(jobs.map(x => x.waitUntilFinished(scrapeQueueEvents, 60000)))).map(x => x[0]); + let jobs = []; + if (Sentry.isInitialized()) { + for (const job of jobDatas) { + // add with sentry instrumentation + jobs.push(await addScrapeJob(job.data as any, {}, job.opts.jobId)); + } + } else { + jobs = await getScrapeQueue().addBulk(jobDatas); + await getScrapeQueue().addBulk(jobs); + } + + const docs = (await Promise.all(jobs.map(x => new Promise((resolve, reject) => { + const start = Date.now(); + const int = setInterval(async () => { + if (Date.now() >= start + 60000) { + clearInterval(int); + reject(new Error("Job wait ")); + } else { + const state = await x.getState(); + if (state === "completed") { + clearInterval(int); + resolve((await getScrapeQueue().getJob(x.id)).returnvalue); + } else if (state === "failed") { + clearInterval(int); + reject((await getScrapeQueue().getJob(x.id)).failedReason); + } + } + }, 1000); + })))).map(x => x[0]); if (docs.length === 0) { return { success: true, error: "No search results found", returnCode: 200 }; @@ -112,7 +139,7 @@ export async function searchHelper( // make sure doc.content is not empty const filteredDocs = docs.filter( - (doc: { content?: string }) => doc.content && doc.content.trim().length > 0 + (doc: { content?: string }) => doc && doc.content && doc.content.trim().length > 0 ); if (filteredDocs.length === 0) { @@ -191,6 +218,10 @@ export async function searchController(req: Request, res: Response) { }); return res.status(result.returnCode).json(result); } catch (error) { + if (error instanceof Error && error.message.startsWith("Job wait")) { + return res.status(408).json({ error: "Request timed out" }); + } + Sentry.captureException(error); Logger.error(error); return res.status(500).json({ error: error.message }); diff --git a/apps/api/src/controllers/status.ts b/apps/api/src/controllers/status.ts index c3ca906f..362f1f24 100644 --- a/apps/api/src/controllers/status.ts +++ b/apps/api/src/controllers/status.ts @@ -1,8 +1,6 @@ import { Request, Response } from "express"; import { Logger } from "../../src/lib/logger"; import { getCrawl, getCrawlJobs } from "../../src/lib/crawl-redis"; -import { getScrapeQueue } from "../../src/services/queue-service"; -import { supabaseGetJobById } from "../../src/lib/supabase-jobs"; import { getJobs } from "./crawl-status"; import * as Sentry from "@sentry/node"; diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts index 2b68e0f1..4d096894 100644 --- a/apps/api/src/index.ts +++ b/apps/api/src/index.ts @@ -119,6 +119,7 @@ if (cluster.isMaster) { waitingJobs, }); } catch (error) { + Sentry.captureException(error); Logger.error(error); return res.status(500).json({ error: error.message }); } @@ -170,6 +171,7 @@ if (cluster.isMaster) { }, timeout); } } catch (error) { + Sentry.captureException(error); Logger.debug(error); } }; diff --git a/apps/api/src/lib/LLM-extraction/index.ts b/apps/api/src/lib/LLM-extraction/index.ts index 85a7e995..af8b0bb1 100644 --- a/apps/api/src/lib/LLM-extraction/index.ts +++ b/apps/api/src/lib/LLM-extraction/index.ts @@ -46,7 +46,7 @@ export async function generateCompletions( return completionResult; } catch (error) { Logger.error(`Error generating completions: ${error}`); - throw new Error(`Error generating completions: ${error.message}`); + throw error; } default: throw new Error("Invalid client"); diff --git a/apps/api/src/lib/LLM-extraction/models.ts b/apps/api/src/lib/LLM-extraction/models.ts index e696a8cd..8ca6bbd4 100644 --- a/apps/api/src/lib/LLM-extraction/models.ts +++ b/apps/api/src/lib/LLM-extraction/models.ts @@ -15,7 +15,7 @@ const defaultPrompt = function prepareOpenAIDoc( document: Document, mode: "markdown" | "raw-html" -): [OpenAI.Chat.Completions.ChatCompletionContentPart[], number] { +): [OpenAI.Chat.Completions.ChatCompletionContentPart[], number] | null { let markdown = document.markdown; @@ -27,9 +27,10 @@ function prepareOpenAIDoc( // Check if the markdown content exists in the document if (!extractionTarget) { - throw new Error( - `${mode} content is missing in the document. This is likely due to an error in the scraping process. Please try again or reach out to help@mendable.ai` - ); + return null; + // throw new Error( + // `${mode} content is missing in the document. This is likely due to an error in the scraping process. Please try again or reach out to help@mendable.ai` + // ); } @@ -64,7 +65,16 @@ export async function generateOpenAICompletions({ mode: "markdown" | "raw-html"; }): Promise { const openai = client as OpenAI; - const [content, numTokens] = prepareOpenAIDoc(document, mode); + const preparedDoc = prepareOpenAIDoc(document, mode); + + if (preparedDoc === null) { + return { + ...document, + warning: "LLM extraction was not performed since the document's content is empty or missing.", + }; + } + + const [content, numTokens] = preparedDoc; const completion = await openai.chat.completions.create({ model, diff --git a/apps/api/src/lib/html-to-markdown.ts b/apps/api/src/lib/html-to-markdown.ts index 233da921..002cb7be 100644 --- a/apps/api/src/lib/html-to-markdown.ts +++ b/apps/api/src/lib/html-to-markdown.ts @@ -1,5 +1,5 @@ -export function parseMarkdown(html: string) { +export async function parseMarkdown(html: string) { var TurndownService = require("turndown"); var turndownPluginGfm = require('joplin-turndown-plugin-gfm') @@ -21,7 +21,27 @@ export function parseMarkdown(html: string) { }); var gfm = turndownPluginGfm.gfm; turndownService.use(gfm); - let markdownContent = turndownService.turndown(html); + let markdownContent = ""; + const turndownPromise = new Promise((resolve, reject) => { + try { + const result = turndownService.turndown(html); + resolve(result); + } catch (error) { + reject("Error converting HTML to Markdown: " + error); + } + }); + + const timeoutPromise = new Promise((resolve, reject) => { + const timeout = 5000; // Timeout in milliseconds + setTimeout(() => reject("Conversion timed out after " + timeout + "ms"), timeout); + }); + + try { + markdownContent = await Promise.race([turndownPromise, timeoutPromise]); + } catch (error) { + console.error(error); + return ""; // Optionally return an empty string or handle the error as needed + } // multiple line links let insideLinkContent = false; diff --git a/apps/api/src/main/runWebScraper.ts b/apps/api/src/main/runWebScraper.ts index 2be05bd5..aea7876e 100644 --- a/apps/api/src/main/runWebScraper.ts +++ b/apps/api/src/main/runWebScraper.ts @@ -12,7 +12,6 @@ import { Document } from "../lib/entities"; import { supabase_service } from "../services/supabase"; import { Logger } from "../lib/logger"; import { ScrapeEvents } from "../lib/scrape-events"; -import { getScrapeQueue } from "../services/queue-service"; export async function startWebScraperPipeline({ job, diff --git a/apps/api/src/routes/admin.ts b/apps/api/src/routes/admin.ts index 77d1bf46..d32808ce 100644 --- a/apps/api/src/routes/admin.ts +++ b/apps/api/src/routes/admin.ts @@ -1,6 +1,7 @@ import express from "express"; import { redisHealthController } from "../controllers/admin/redis-health"; import { + autoscalerController, checkQueuesController, cleanBefore24hCompleteJobsController, queuesController, @@ -27,3 +28,8 @@ adminRouter.get( `/admin/${process.env.BULL_AUTH_KEY}/queues`, queuesController ); + +adminRouter.get( + `/admin/${process.env.BULL_AUTH_KEY}/autoscaler`, + autoscalerController +); diff --git a/apps/api/src/scraper/WebScraper/crawler.ts b/apps/api/src/scraper/WebScraper/crawler.ts index 02894cfc..92b9ae40 100644 --- a/apps/api/src/scraper/WebScraper/crawler.ts +++ b/apps/api/src/scraper/WebScraper/crawler.ts @@ -53,8 +53,8 @@ export class WebCrawler { this.jobId = jobId; this.initialUrl = initialUrl; this.baseUrl = new URL(initialUrl).origin; - this.includes = includes ?? []; - this.excludes = excludes ?? []; + this.includes = Array.isArray(includes) ? includes : []; + this.excludes = Array.isArray(excludes) ? excludes : []; this.limit = limit; this.robotsTxtUrl = `${this.baseUrl}/robots.txt`; this.robots = robotsParser(this.robotsTxtUrl, ""); @@ -108,7 +108,12 @@ export class WebCrawler { // Normalize the initial URL and the link to account for www and non-www versions const normalizedInitialUrl = new URL(this.initialUrl); - const normalizedLink = new URL(link); + let normalizedLink; + try { + normalizedLink = new URL(link); + } catch (_) { + return false; + } const initialHostname = normalizedInitialUrl.hostname.replace(/^www\./, ''); const linkHostname = normalizedLink.hostname.replace(/^www\./, ''); diff --git a/apps/api/src/scraper/WebScraper/index.ts b/apps/api/src/scraper/WebScraper/index.ts index 65247df1..38d0cc32 100644 --- a/apps/api/src/scraper/WebScraper/index.ts +++ b/apps/api/src/scraper/WebScraper/index.ts @@ -16,7 +16,6 @@ import { replacePathsWithAbsolutePaths, } from "./utils/replacePaths"; import { generateCompletions } from "../../lib/LLM-extraction"; -import { getScrapeQueue } from "../../../src/services/queue-service"; import { fetchAndProcessDocx } from "./utils/docxProcessor"; import { getAdjustedMaxDepth, getURLDepth } from "./utils/maxDepthUtils"; import { Logger } from "../../lib/logger"; diff --git a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts index b520bfe2..aa86ad5e 100644 --- a/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts +++ b/apps/api/src/scraper/WebScraper/scrapers/fireEngine.ts @@ -5,6 +5,7 @@ import { generateRequestParams } from "../single_url"; import { fetchAndProcessPdf } from "../utils/pdfProcessor"; import { universalTimeout } from "../global"; import { Logger } from "../../../lib/logger"; +import * as Sentry from "@sentry/node"; /** * Scrapes a URL with Fire-Engine @@ -92,27 +93,35 @@ export async function scrapWithFireEngine({ }); const startTime = Date.now(); - const _response = await axiosInstance.post( - process.env.FIRE_ENGINE_BETA_URL + endpoint, - { - url: url, - wait: waitParam, - screenshot: screenshotParam, - fullPageScreenshot: fullPageScreenshotParam, - headers: headers, - pageOptions: pageOptions, - disableJsDom: pageOptions?.disableJsDom ?? false, - priority, - engine, - instantReturn: true, - ...fireEngineOptionsParam, - }, - { - headers: { - "Content-Type": "application/json", + const _response = await Sentry.startSpan({ + name: "Call to fire-engine" + }, async span => { + return await axiosInstance.post( + process.env.FIRE_ENGINE_BETA_URL + endpoint, + { + url: url, + wait: waitParam, + screenshot: screenshotParam, + fullPageScreenshot: fullPageScreenshotParam, + headers: headers, + pageOptions: pageOptions, + disableJsDom: pageOptions?.disableJsDom ?? false, + priority, + engine, + instantReturn: true, + ...fireEngineOptionsParam, + }, + { + headers: { + "Content-Type": "application/json", + ...(Sentry.isInitialized() ? ({ + "sentry-trace": Sentry.spanToTraceHeader(span), + "baggage": Sentry.spanToBaggageHeader(span), + }) : {}), + } } - } - ); + ); + }); let checkStatusResponse = await axiosInstance.get(`${process.env.FIRE_ENGINE_BETA_URL}/scrape/${_response.data.jobId}`); while (checkStatusResponse.data.processing && Date.now() - startTime < universalTimeout + waitParam) { diff --git a/apps/api/src/scraper/WebScraper/single_url.ts b/apps/api/src/scraper/WebScraper/single_url.ts index 1f2a62de..6998a665 100644 --- a/apps/api/src/scraper/WebScraper/single_url.ts +++ b/apps/api/src/scraper/WebScraper/single_url.ts @@ -24,8 +24,8 @@ import { clientSideError } from "../../strings"; dotenv.config(); export const baseScrapers = [ - "fire-engine", "fire-engine;chrome-cdp", + "fire-engine", "scrapingBee", process.env.USE_DB_AUTHENTICATION ? undefined : "playwright", "scrapingBeeLoad", @@ -85,8 +85,8 @@ function getScrapingFallbackOrder( }); let defaultOrder = [ - !process.env.USE_DB_AUTHENTICATION ? undefined : "fire-engine", !process.env.USE_DB_AUTHENTICATION ? undefined : "fire-engine;chrome-cdp", + !process.env.USE_DB_AUTHENTICATION ? undefined : "fire-engine", "scrapingBee", process.env.USE_DB_AUTHENTICATION ? undefined : "playwright", "scrapingBeeLoad", diff --git a/apps/api/src/scraper/WebScraper/utils/__tests__/blocklist.test.ts b/apps/api/src/scraper/WebScraper/utils/__tests__/blocklist.test.ts index 42525257..77411b00 100644 --- a/apps/api/src/scraper/WebScraper/utils/__tests__/blocklist.test.ts +++ b/apps/api/src/scraper/WebScraper/utils/__tests__/blocklist.test.ts @@ -8,7 +8,6 @@ describe('Blocklist Functionality', () => { 'https://twitter.com/home', 'https://instagram.com/explore', 'https://linkedin.com/in/johndoe', - 'https://pinterest.com/pin/create', 'https://snapchat.com/add/johndoe', 'https://tiktok.com/@johndoe', 'https://reddit.com/r/funny', diff --git a/apps/api/src/scraper/WebScraper/utils/__tests__/socialBlockList.test.ts b/apps/api/src/scraper/WebScraper/utils/__tests__/socialBlockList.test.ts index c09cc5b3..3d98fedf 100644 --- a/apps/api/src/scraper/WebScraper/utils/__tests__/socialBlockList.test.ts +++ b/apps/api/src/scraper/WebScraper/utils/__tests__/socialBlockList.test.ts @@ -8,7 +8,6 @@ describe('isUrlBlocked', () => { 'https://twitter.com/someuser', 'https://instagram.com/someuser', 'https://www.linkedin.com/in/someuser', - 'https://pinterest.com/someuser', 'https://snapchat.com/someuser', 'https://tiktok.com/@someuser', 'https://reddit.com/r/somesubreddit', diff --git a/apps/api/src/scraper/WebScraper/utils/blocklist.ts b/apps/api/src/scraper/WebScraper/utils/blocklist.ts index 7f1602e1..99eb6bd2 100644 --- a/apps/api/src/scraper/WebScraper/utils/blocklist.ts +++ b/apps/api/src/scraper/WebScraper/utils/blocklist.ts @@ -6,7 +6,6 @@ const socialMediaBlocklist = [ 'twitter.com', 'instagram.com', 'linkedin.com', - 'pinterest.com', 'snapchat.com', 'tiktok.com', 'reddit.com', @@ -16,7 +15,8 @@ const socialMediaBlocklist = [ 'wechat.com', 'telegram.org', 'researchhub.com', - 'youtube.com' + 'youtube.com', + 'corterix.com', ]; const allowedKeywords = [ diff --git a/apps/api/src/services/alerts/slack.ts b/apps/api/src/services/alerts/slack.ts index 96bf1c09..0fa75693 100644 --- a/apps/api/src/services/alerts/slack.ts +++ b/apps/api/src/services/alerts/slack.ts @@ -3,9 +3,9 @@ import { Logger } from "../../../src/lib/logger"; export async function sendSlackWebhook( message: string, - alertEveryone: boolean = false + alertEveryone: boolean = false, + webhookUrl: string = process.env.SLACK_WEBHOOK_URL ?? "" ) { - const webhookUrl = process.env.SLACK_WEBHOOK_URL; const messagePrefix = alertEveryone ? " " : ""; const payload = { text: `${messagePrefix} ${message}`, diff --git a/apps/api/src/services/billing/credit_billing.ts b/apps/api/src/services/billing/credit_billing.ts index 2ad07318..19c17b48 100644 --- a/apps/api/src/services/billing/credit_billing.ts +++ b/apps/api/src/services/billing/credit_billing.ts @@ -317,21 +317,21 @@ export async function supaCheckTeamCredits(team_id: string, credits: number) { // Compare the adjusted total credits used with the credits allowed by the plan if (adjustedCreditsUsed + credits > price.credits) { - await sendNotification( - team_id, - NotificationType.LIMIT_REACHED, - subscription.current_period_start, - subscription.current_period_end - ); + // await sendNotification( + // team_id, + // NotificationType.LIMIT_REACHED, + // subscription.current_period_start, + // subscription.current_period_end + // ); return { success: false, message: "Insufficient credits, please upgrade!", remainingCredits: creditLimit - adjustedCreditsUsed }; } else if (creditUsagePercentage >= 0.8) { // Send email notification for approaching credit limit - await sendNotification( - team_id, - NotificationType.APPROACHING_LIMIT, - subscription.current_period_start, - subscription.current_period_end - ); + // await sendNotification( + // team_id, + // NotificationType.APPROACHING_LIMIT, + // subscription.current_period_start, + // subscription.current_period_end + // ); } return { success: true, message: "Sufficient credits available", remainingCredits: creditLimit - adjustedCreditsUsed }; diff --git a/apps/api/src/services/queue-jobs.ts b/apps/api/src/services/queue-jobs.ts index eb7e4727..740f48a2 100644 --- a/apps/api/src/services/queue-jobs.ts +++ b/apps/api/src/services/queue-jobs.ts @@ -2,6 +2,20 @@ import { Job, Queue } from "bullmq"; import { getScrapeQueue } from "./queue-service"; import { v4 as uuidv4 } from "uuid"; import { WebScraperOptions } from "../types"; +import * as Sentry from "@sentry/node"; + +async function addScrapeJobRaw( + webScraperOptions: any, + options: any, + jobId: string, + jobPriority: number = 10 +): Promise { + return await getScrapeQueue().add(jobId, webScraperOptions, { + ...options, + priority: jobPriority, + jobId, + }); +} export async function addScrapeJob( webScraperOptions: WebScraperOptions, @@ -9,11 +23,30 @@ export async function addScrapeJob( jobId: string = uuidv4(), jobPriority: number = 10 ): Promise { - return await getScrapeQueue().add(jobId, webScraperOptions, { - priority: jobPriority, - ...options, - jobId, - }); + + if (Sentry.isInitialized()) { + const size = JSON.stringify(webScraperOptions).length; + return await Sentry.startSpan({ + name: "Add scrape job", + op: "queue.publish", + attributes: { + "messaging.message.id": jobId, + "messaging.destination.name": getScrapeQueue().name, + "messaging.message.body.size": size, + }, + }, async (span) => { + return await addScrapeJobRaw({ + ...webScraperOptions, + sentry: { + trace: Sentry.spanToTraceHeader(span), + baggage: Sentry.spanToBaggageHeader(span), + size, + }, + }, options, jobId, jobPriority); + }); + } else { + return await addScrapeJobRaw(webScraperOptions, options, jobId, jobPriority); + } } diff --git a/apps/api/src/services/queue-service.ts b/apps/api/src/services/queue-service.ts index b13489a6..113b3fa3 100644 --- a/apps/api/src/services/queue-service.ts +++ b/apps/api/src/services/queue-service.ts @@ -35,6 +35,6 @@ export function getScrapeQueue() { } -import { QueueEvents } from 'bullmq'; - -export const scrapeQueueEvents = new QueueEvents(scrapeQueueName, { connection: redisConnection }); \ No newline at end of file +// === REMOVED IN FAVOR OF POLLING -- NOT RELIABLE +// import { QueueEvents } from 'bullmq'; +// export const scrapeQueueEvents = new QueueEvents(scrapeQueueName, { connection: redisConnection.duplicate() }); \ No newline at end of file diff --git a/apps/api/src/services/queue-worker.ts b/apps/api/src/services/queue-worker.ts index 1a61e02c..ca1a4cbd 100644 --- a/apps/api/src/services/queue-worker.ts +++ b/apps/api/src/services/queue-worker.ts @@ -53,6 +53,7 @@ const processJobInternal = async (token: string, job: Job) => { }, jobLockExtendInterval); await addJobPriority(job.data.team_id, job.id ); + let err = null; try { const result = await processJob(job, token); try{ @@ -65,11 +66,15 @@ const processJobInternal = async (token: string, job: Job) => { } } catch (error) { console.log("Job failed, error:", error); + Sentry.captureException(error); + err = error; await job.moveToFailed(error, token, false); } finally { await deleteJobPriority(job.data.team_id, job.id ); clearInterval(extendLockInterval); } + + return err; }; let isShuttingDown = false; @@ -79,7 +84,7 @@ process.on("SIGINT", () => { isShuttingDown = true; }); -const workerFun = async (queueName: string, processJobInternal: (token: string, job: Job) => Promise) => { +const workerFun = async (queueName: string, processJobInternal: (token: string, job: Job) => Promise) => { const worker = new Worker(queueName, null, { connection: redisConnection, lockDuration: 1 * 60 * 1000, // 1 minute @@ -107,16 +112,47 @@ const workerFun = async (queueName: string, processJobInternal: (token: string, const job = await worker.getNextJob(token); if (job) { - Sentry.startSpan({ - name: "Scrape job", - op: "bullmq.job", - attributes: { - job: job.id, - worker: process.env.FLY_MACHINE_ID ?? worker.id, - }, - }, async () => { - await processJobInternal(token, job); - }); + if (job.data && job.data.sentry && Sentry.isInitialized()) { + Sentry.continueTrace({ sentryTrace: job.data.sentry.trace, baggage: job.data.sentry.baggage }, () => { + Sentry.startSpan({ + name: "Scrape job", + attributes: { + job: job.id, + worker: process.env.FLY_MACHINE_ID ?? worker.id, + }, + }, async (span) => { + await Sentry.startSpan({ + name: "Process scrape job", + op: "queue.process", + attributes: { + "messaging.message.id": job.id, + "messaging.destination.name": getScrapeQueue().name, + "messaging.message.body.size": job.data.sentry.size, + "messaging.message.receive.latency": Date.now() - (job.processedOn ?? job.timestamp), + "messaging.message.retry.count": job.attemptsMade, + } + }, async () => { + const res = await processJobInternal(token, job); + if (res !== null) { + span.setStatus({ code: 2 }); // ERROR + } else { + span.setStatus({ code: 1 }); // OK + } + }); + }); + }); + } else { + Sentry.startSpan({ + name: "Scrape job", + attributes: { + job: job.id, + worker: process.env.FLY_MACHINE_ID ?? worker.id, + }, + }, () => { + processJobInternal(token, job); + }); + } + await sleep(gotJobInterval); } else { await sleep(connectionMonitorInterval); diff --git a/apps/api/src/services/rate-limiter.ts b/apps/api/src/services/rate-limiter.ts index 05fb102c..8e4e9fc9 100644 --- a/apps/api/src/services/rate-limiter.ts +++ b/apps/api/src/services/rate-limiter.ts @@ -17,10 +17,23 @@ const RATE_LIMITS = { growthdouble: 50, }, scrape: { + default: 20, + free: 10, + starter: 20, + standard: 100, + standardOld: 40, + scale: 500, + hobby: 20, + standardNew: 100, + standardnew: 100, + growth: 1000, + growthdouble: 1000, + }, + search: { default: 20, free: 5, starter: 20, - standard: 50, + standard: 40, standardOld: 40, scale: 500, hobby: 10, @@ -29,7 +42,7 @@ const RATE_LIMITS = { growth: 500, growthdouble: 500, }, - search: { + map:{ default: 20, free: 5, starter: 20, @@ -84,16 +97,28 @@ export const testSuiteRateLimiter = new RateLimiterRedis({ duration: 60, // Duration in seconds }); +export const devBRateLimiter = new RateLimiterRedis({ + storeClient: redisRateLimitClient, + keyPrefix: "dev-b", + points: 1200, + duration: 60, // Duration in seconds +}); + export function getRateLimiter( mode: RateLimiterMode, token: string, - plan?: string + plan?: string, + teamId?: string ) { - if (token.includes("a01ccae") || token.includes("6254cf9")) { + if (token.includes("a01ccae") || token.includes("6254cf9") || token.includes("0f96e673") || token.includes("23befa1b")) { return testSuiteRateLimiter; } + if(teamId === process.env.DEV_B_TEAM_ID) { + return devBRateLimiter; + } + const rateLimitConfig = RATE_LIMITS[mode]; // {default : 5} if (!rateLimitConfig) return serverRateLimiter; diff --git a/apps/api/src/services/sentry.ts b/apps/api/src/services/sentry.ts index 1292773a..176d3d4b 100644 --- a/apps/api/src/services/sentry.ts +++ b/apps/api/src/services/sentry.ts @@ -10,8 +10,9 @@ if (process.env.SENTRY_DSN) { integrations: [ nodeProfilingIntegration(), ], - tracesSampleRate: 0.045, + tracesSampleRate: process.env.SENTRY_ENVIRONMENT === "dev" ? 1.0 : 0.045, profilesSampleRate: 1.0, serverName: process.env.FLY_MACHINE_ID, + environment: process.env.SENTRY_ENVIRONMENT ?? "production", }); } diff --git a/apps/redis/.dockerignore b/apps/redis/.dockerignore new file mode 100644 index 00000000..860aa7ad --- /dev/null +++ b/apps/redis/.dockerignore @@ -0,0 +1,2 @@ +.git +fly.toml diff --git a/apps/redis/Dockerfile b/apps/redis/Dockerfile new file mode 100644 index 00000000..77ea66ae --- /dev/null +++ b/apps/redis/Dockerfile @@ -0,0 +1,6 @@ +ARG REDIS_VERSION=7.2.5 +FROM bitnami/redis:${REDIS_VERSION} + +COPY start-redis-server.sh /usr/bin/start-redis-server.sh + +CMD ["/usr/bin/start-redis-server.sh"] diff --git a/apps/redis/Procfile b/apps/redis/Procfile new file mode 100644 index 00000000..8f661345 --- /dev/null +++ b/apps/redis/Procfile @@ -0,0 +1,2 @@ +redis: /usr/bin/start-redis-server.sh +metrics: /usr/local/bin/redis_exporter -redis.addr localhost:6379 -web.listen-address ":9091" diff --git a/apps/redis/README.md b/apps/redis/README.md new file mode 100644 index 00000000..7d2bcabd --- /dev/null +++ b/apps/redis/README.md @@ -0,0 +1,48 @@ +The official repository for Running Redis on Fly.io. Find the accompanying Docker image at [flyio/redis](https://hub.docker.com/repository/docker/flyio/redis). + +## Usage + +This installation requires setting a password on Redis. To do that, run `fly secrets set REDIS_PASSWORD=mypassword` before deploying. Keep +track of this password - it won't be visible again after deployment! + +If you need no customizations, you can deploy using the official Docker image. See `fly.toml` in this repository for an example to get started with. +## Runtime requirements + +By default, this Redis installation will only accept connections on the private IPv6 network, on the standard port 6379. + +If you want to access it from the public internet, add a `[[services]]` section to your `fly.toml`. An example is included in this repo for accessing Redis on port 10000. + + +We recommend adding persistent storage for Redis data. If you skip this step, data will be lost across deploys or restarts. For Fly apps, the volume needs to be in the same region as the app instances. For example: + +```cmd +flyctl volumes create redis_server --region ord +``` +```out + Name: redis_server + Region: ord + Size GB: 10 +Created at: 02 Nov 20 19:55 UTC +``` + +To connect this volume to the app, `fly.toml` includes a `[mounts]` entry. + +``` +[mounts] +source = "redis_server" +destination = "/data" +``` + +When the app starts, that volume will be mounted on /data. + +## Cutting a release + +If you have write access to this repo, you can ship a prerelease or full release with: + +``` +scripts/bump_version.sh +``` +or +``` +scripts/bump_version.sh prerel +``` diff --git a/apps/dragonfly/fly.toml b/apps/redis/fly.toml similarity index 60% rename from apps/dragonfly/fly.toml rename to apps/redis/fly.toml index 14bdbd96..1bcd05fb 100644 --- a/apps/dragonfly/fly.toml +++ b/apps/redis/fly.toml @@ -1,13 +1,8 @@ app = 'firecrawl-dragonfly' primary_region = 'iad' -[experimental] - cmd = ['dragonfly','--logtostderr', '--cluster_mode=emulated', '--lock_on_hashtags', "--bind","::"] -[build] - image = 'ghcr.io/dragonflydb/dragonfly' - [[mounts]] - source = 'firecrawl_dragonfly' + source = 'firecrawl_redis' destination = '/data' [[services]] diff --git a/apps/redis/scripts/bump_version.sh b/apps/redis/scripts/bump_version.sh new file mode 100755 index 00000000..4a82c00d --- /dev/null +++ b/apps/redis/scripts/bump_version.sh @@ -0,0 +1,91 @@ +#!/usr/bin/env bash + +set -euo pipefail + +ORIGIN=${ORIGIN:-origin} + +bump=${1:-patch} + +prerel=${2:-none} + +if [[ $bump == "prerel" ]]; then + bump="patch" + prerel="prerel" +fi + +if [[ $(git status --porcelain) != "" ]]; then + echo "Error: repo is dirty. Run git status, clean repo and try again." + exit 1 +elif [[ $(git status --porcelain -b | grep -e "ahead" -e "behind") != "" ]]; then + echo "Error: repo has unpushed commits. Push commits to remote and try again." + exit 1 +fi + +BRANCH="$(git rev-parse --abbrev-ref HEAD)" +if [[ "$prerel" == "prerel" && "$BRANCH" != "prerelease" ]]; then +# echo "❌ Sorry, you can only cut a pre-release from the 'prelease' branch" +# echo "Run 'git checkout prerelease && git pull origin prerelease' and try again." +# exit 1 + echo "⚠️ Pre-releases should be cut from the 'prerelease' branch" + echo "Please make sure you're not overwriting someone else's prerelease!" + echo + read -p "Release anyway? " -n 1 -r + echo + if [[ $REPLY =~ ^[^Yy]$ ]]; then + echo Aborting. + exit 1 + fi +fi + +if [[ "$prerel" != "prerel" && "$BRANCH" != "main" ]]; then + echo "❌ Sorry, you can only cut a release from the 'main' branch" + echo "Run 'git checkout main && git pull origin main' and try again." + exit 1 +fi + +git fetch +if [[ "$(git rev-parse HEAD 2>&1)" != "$(git rev-parse '@{u}' 2>&1)" ]]; then + echo "There are upstream commits that won't be included in this release." + echo "You probably want to exit, run 'git pull', then release." + echo + read -p "Release anyway? " -n 1 -r + echo + if [[ $REPLY =~ ^[^Yy]$ ]]; then + echo Aborting. + exit 1 + fi +fi + +dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +previous_version="$("$dir"/../scripts/version.sh -s)" + +if [[ $prerel == "prerel" ]]; then + prerelversion=$("$dir"/../scripts/semver get prerel "$previous_version") + if [[ $prerelversion == "" ]]; then + new_version=$("$dir"/../scripts/semver bump "$bump" "$previous_version") + new_version=$("$dir"/../scripts/semver bump prerel pre-1 "$new_version") + else + prerel=pre-$((${prerelversion#pre-} + 1)) + new_version=$("$dir"/../scripts/semver bump prerel "$prerel" "$previous_version") + fi +else + prerelversion=$("$dir"/../scripts/semver get prerel "$previous_version") + if [[ $prerelversion == "" ]]; then + new_version=$("$dir"/../scripts/semver bump "$bump" "$previous_version") + else + new_version=${previous_version//-$prerelversion/} + fi +fi + +new_version="v$new_version" + +echo "Bumping version from v${previous_version} to ${new_version}" + +read -p "Are you sure? " -n 1 -r +echo +if [[ $REPLY =~ ^[Yy]$ ]] +then + git tag -m "release ${new_version}" -a "$new_version" && git push "${ORIGIN}" tag "$new_version" + echo "done" +fi diff --git a/apps/redis/scripts/semver b/apps/redis/scripts/semver new file mode 100755 index 00000000..674229e0 --- /dev/null +++ b/apps/redis/scripts/semver @@ -0,0 +1,200 @@ +#!/usr/bin/env bash + +set -o errexit -o nounset -o pipefail + +SEMVER_REGEX="^[vV]?(0|[1-9][0-9]*)\\.(0|[1-9][0-9]*)\\.(0|[1-9][0-9]*)(\\-[0-9A-Za-z-]+(\\.[0-9A-Za-z-]+)*)?(\\+[0-9A-Za-z-]+(\\.[0-9A-Za-z-]+)*)?$" + +PROG=semver +PROG_VERSION=2.1.0 + +USAGE="\ +Usage: + $PROG bump (major|minor|patch|release|prerel |build ) + $PROG compare + $PROG get (major|minor|patch|release|prerel|build) + $PROG --help + $PROG --version + +Arguments: + A version must match the following regex pattern: + \"${SEMVER_REGEX}\". + In english, the version must match X.Y.Z(-PRERELEASE)(+BUILD) + where X, Y and Z are positive integers, PRERELEASE is an optional + string composed of alphanumeric characters and hyphens and + BUILD is also an optional string composed of alphanumeric + characters and hyphens. + + See definition. + + String that must be composed of alphanumeric characters and hyphens. + + String that must be composed of alphanumeric characters and hyphens. + +Options: + -v, --version Print the version of this tool. + -h, --help Print this help message. + +Commands: + bump Bump by one of major, minor, patch, prerel, build + or a forced potentially conflicting version. The bumped version is + shown to stdout. + + compare Compare with , output to stdout the + following values: -1 if is newer, 0 if equal, 1 if + older. + + get Extract given part of , where part is one of major, minor, + patch, prerel, build." + +function error { + echo -e "$1" >&2 + exit 1 +} + +function usage-help { + error "$USAGE" +} + +function usage-version { + echo -e "${PROG}: $PROG_VERSION" + exit 0 +} + +function validate-version { + local version=$1 + if [[ "$version" =~ $SEMVER_REGEX ]]; then + # if a second argument is passed, store the result in var named by $2 + if [ "$#" -eq "2" ]; then + local major=${BASH_REMATCH[1]} + local minor=${BASH_REMATCH[2]} + local patch=${BASH_REMATCH[3]} + local prere=${BASH_REMATCH[4]} + local build=${BASH_REMATCH[6]} + eval "$2=(\"$major\" \"$minor\" \"$patch\" \"$prere\" \"$build\")" + else + echo "$version" + fi + else + error "version $version does not match the semver scheme 'X.Y.Z(-PRERELEASE)(+BUILD)'. See help for more information." + fi +} + +function compare-version { + validate-version "$1" V + validate-version "$2" V_ + + # MAJOR, MINOR and PATCH should compare numerically + for i in 0 1 2; do + local diff=$((${V[$i]} - ${V_[$i]})) + if [[ $diff -lt 0 ]]; then + echo -1; return 0 + elif [[ $diff -gt 0 ]]; then + echo 1; return 0 + fi + done + + # PREREL should compare with the ASCII order. + if [[ -z "${V[3]}" ]] && [[ -n "${V_[3]}" ]]; then + echo 1; return 0; + elif [[ -n "${V[3]}" ]] && [[ -z "${V_[3]}" ]]; then + echo -1; return 0; + elif [[ -n "${V[3]}" ]] && [[ -n "${V_[3]}" ]]; then + if [[ "${V[3]}" > "${V_[3]}" ]]; then + echo 1; return 0; + elif [[ "${V[3]}" < "${V_[3]}" ]]; then + echo -1; return 0; + fi + fi + + echo 0 +} + +function command-bump { + local new; local version; local sub_version; local command; + + case $# in + 2) case $1 in + major|minor|patch|release) command=$1; version=$2;; + *) usage-help;; + esac ;; + 3) case $1 in + prerel|build) command=$1; sub_version=$2 version=$3 ;; + *) usage-help;; + esac ;; + *) usage-help;; + esac + + validate-version "$version" parts + # shellcheck disable=SC2154 + local major="${parts[0]}" + local minor="${parts[1]}" + local patch="${parts[2]}" + local prere="${parts[3]}" + local build="${parts[4]}" + + case "$command" in + major) new="$((major + 1)).0.0";; + minor) new="${major}.$((minor + 1)).0";; + patch) new="${major}.${minor}.$((patch + 1))";; + release) new="${major}.${minor}.${patch}";; + prerel) new=$(validate-version "${major}.${minor}.${patch}-${sub_version}");; + build) new=$(validate-version "${major}.${minor}.${patch}${prere}+${sub_version}");; + *) usage-help ;; + esac + + echo "$new" + exit 0 +} + +function command-compare { + local v; local v_; + + case $# in + 2) v=$(validate-version "$1"); v_=$(validate-version "$2") ;; + *) usage-help ;; + esac + + compare-version "$v" "$v_" + exit 0 +} + + +# shellcheck disable=SC2034 +function command-get { + local part version + + if [[ "$#" -ne "2" ]] || [[ -z "$1" ]] || [[ -z "$2" ]]; then + usage-help + exit 0 + fi + + part="$1" + version="$2" + + validate-version "$version" parts + local major="${parts[0]}" + local minor="${parts[1]}" + local patch="${parts[2]}" + local prerel="${parts[3]:1}" + local build="${parts[4]:1}" + + case "$part" in + major|minor|patch|release|prerel|build) echo "${!part}" ;; + *) usage-help ;; + esac + + exit 0 +} + +case $# in + 0) echo "Unknown command: $*"; usage-help;; +esac + +case $1 in + --help|-h) echo -e "$USAGE"; exit 0;; + --version|-v) usage-version ;; + bump) shift; command-bump "$@";; + get) shift; command-get "$@";; + compare) shift; command-compare "$@";; + *) echo "Unknown arguments: $*"; usage-help;; +esac diff --git a/apps/redis/scripts/version.sh b/apps/redis/scripts/version.sh new file mode 100755 index 00000000..0d3d9875 --- /dev/null +++ b/apps/redis/scripts/version.sh @@ -0,0 +1,5 @@ +ORIGIN=${ORIGIN:-origin} + +version=$(git fetch --tags "${ORIGIN}" &>/dev/null | git -c "versionsort.prereleasesuffix=-pre" tag -l --sort=version:refname | grep -v dev | grep -vE '^v2$' | grep -vE '^v1$' | tail -n1 | cut -c 2-) + +echo "$version" diff --git a/apps/redis/start-redis-server.sh b/apps/redis/start-redis-server.sh new file mode 100755 index 00000000..ed252fde --- /dev/null +++ b/apps/redis/start-redis-server.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +set -e + +sysctl vm.overcommit_memory=1 || true +sysctl net.core.somaxconn=1024 || true + +PW_ARG="" +if [[ ! -z "${REDIS_PASSWORD}" ]]; then + PW_ARG="--requirepass $REDIS_PASSWORD" +fi + +# Set maxmemory-policy to 'allkeys-lru' for caching servers that should always evict old keys +: ${MAXMEMORY_POLICY:="volatile-lru"} +: ${APPENDONLY:="no"} +: ${FLY_VM_MEMORY_MB:=512} +if [ "${NOSAVE}" = "" ] ; then + : ${SAVE:="3600 1 300 100 60 10000"} +fi +# Set maxmemory to 10% of available memory +MAXMEMORY=$(($FLY_VM_MEMORY_MB*80/100)) + +mkdir /data/redis + +redis-server $PW_ARG \ + --dir /data/redis \ + --maxmemory "${MAXMEMORY}mb" \ + --maxmemory-policy $MAXMEMORY_POLICY \ + --appendonly $APPENDONLY \ + --save "$SAVE" diff --git a/examples/find_internal_link_opportunites/find_internal_link_opportunites.ipynb b/examples/find_internal_link_opportunites/find_internal_link_opportunites.ipynb new file mode 100644 index 00000000..d6168878 --- /dev/null +++ b/examples/find_internal_link_opportunites/find_internal_link_opportunites.ipynb @@ -0,0 +1,509 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import datetime\n", + "import time\n", + "from firecrawl import FirecrawlApp\n", + "import json\n", + "import anthropic\n", + "from dotenv import load_dotenv\n", + "\n", + "# Load environment variables\n", + "load_dotenv()\n", + "\n", + "# Retrieve API keys from environment variables\n", + "anthropic_api_key = os.getenv(\"ANTHROPIC_API_KEY\") or \"\"\n", + "firecrawl_api_key = os.getenv(\"FIRECRAWL_API_KEY\") or \"\"\n", + "# Set variables\n", + "blog_url=\"https://mendable.ai/blog\"\n", + "\n", + "# Set up anthropic client\n", + "client = anthropic.Anthropic(\n", + " api_key=anthropic_api_key,\n", + ")\n", + "\n", + "# Initialize the FirecrawlApp with your API key\n", + "app = FirecrawlApp(api_key=firecrawl_api_key)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Crawl a website\n", + "params = {\n", + " 'crawlOptions': {\n", + " 'limit': 100\n", + " },\n", + " \"pageOptions\": {\n", + " \"onlyMainContent\": True\n", + " }\n", + "}\n", + "crawl_result = app.crawl_url(blog_url, params=params)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting potential links from crawl_result:\n", + "Collected 36 potential links:\n", + "URL: https://mendable.ai/blog/coachgtm-mongodb, Title: Meet MongoDBs CoachGTM.ai\n", + "URL: https://mendable.ai/blog/building-safe-rag, Title: Building Safe RAG systems with the LLM OWASP top 10\n", + "URL: https://mendable.ai/blog/gdpr-repository-pattern, Title: Navigating the Maze of GDPR Compliance: A Codebase Transformation\n", + "URL: https://mendable.ai/blog/how-mendable-leverages-langsmith-to-debug-tools-and-actions, Title: How Mendable leverages Langsmith to debug Tools & Actions\n", + "URL: https://mendable.ai/blog/european-data-storage, Title: Launching European Data Storage powered by MongoDB\n", + "URL: https://mendable.ai/blog/tools, Title: Introducing Tools and Actions\n", + "URL: https://mendable.ai/blog/december_update, Title: Mendable.ai December Recap\n", + "URL: https://mendable.ai/blog/november_update, Title: Mendable.ai November Update\n", + "URL: https://mendable.ai/blog/october-recap, Title: Mendable.ai October Recap\n", + "URL: https://mendable.ai/blog/midseptemberupdate, Title: Mendable.ai Mid September 2023 Update\n", + "URL: https://mendable.ai/blog/getting-started, Title: Everything you need to know about Mendable: Build and deploy AI Chat Search\n", + "URL: https://mendable.ai/blog/building-copilots, Title: Building context-aware AI copilots with Mendable\n", + "URL: https://mendable.ai/blog/august2023update, Title: Mendable.ai August 2023 Updates\n", + "URL: https://mendable.ai/blog/finetuning-gpt35, Title: Early Insights Fine-Tuning GPT 3.5 from Mendable.ai\n", + "URL: https://mendable.ai/blog/gpt35prompting, Title: Improving GPT-3.5, Insights from Mendable.ai\n", + "URL: https://mendable.ai/blog/precisemode, Title: Introducing Precise Mode for Mendable.ai\n", + "URL: https://mendable.ai/blog/customprompt, Title: Customizing Your LLM Model on Mendable.ai\n", + "URL: https://mendable.ai/blog/mendable-launch, Title: Introducing Mendable.ai\n", + "URL: https://mendable.ai/blog/european-data-storage, Title: Launching European Data Storage powered by MongoDB\n", + "URL: https://mendable.ai/blog/customprompt, Title: Customizing Your LLM Model on Mendable.ai\n", + "URL: https://mendable.ai/blog/precisemode, Title: Introducing Precise Mode for Mendable.ai\n", + "URL: https://mendable.ai/blog/building-copilots, Title: Building context-aware AI copilots with Mendable\n", + "URL: https://mendable.ai/blog/coachgtm-mongodb, Title: Meet MongoDBs CoachGTM.ai\n", + "URL: https://mendable.ai/blog/building-safe-rag, Title: Building Safe RAG systems with the LLM OWASP top 10\n", + "URL: https://mendable.ai/blog/gdpr-repository-pattern, Title: Navigating the Maze of GDPR Compliance: A Codebase Transformation\n", + "URL: https://mendable.ai/blog/how-mendable-leverages-langsmith-to-debug-tools-and-actions, Title: How Mendable leverages Langsmith to debug Tools & Actions\n", + "URL: https://mendable.ai/blog/tools, Title: Introducing Tools and Actions\n", + "URL: https://mendable.ai/blog/december_update, Title: Mendable.ai December Recap\n", + "URL: https://mendable.ai/blog/november_update, Title: Mendable.ai November Update\n", + "URL: https://mendable.ai/blog/october-recap, Title: Mendable.ai October Recap\n", + "URL: https://mendable.ai/blog/midseptemberupdate, Title: Mendable.ai Mid September 2023 Update\n", + "URL: https://mendable.ai/blog/getting-started, Title: Everything you need to know about Mendable: Build and deploy AI Chat Search\n", + "URL: https://mendable.ai/blog/august2023update, Title: Mendable.ai August 2023 Updates\n", + "URL: https://mendable.ai/blog/finetuning-gpt35, Title: Early Insights Fine-Tuning GPT 3.5 from Mendable.ai\n", + "URL: https://mendable.ai/blog/gpt35prompting, Title: Improving GPT-3.5, Insights from Mendable.ai\n", + "URL: https://mendable.ai/blog/mendable-launch, Title: Introducing Mendable.ai\n" + ] + } + ], + "source": [ + "potential_links = []\n", + "\n", + "if crawl_result:\n", + " print(\"Collecting potential links from crawl_result:\")\n", + " \n", + " for item in crawl_result:\n", + " metadata = item[\"metadata\"]\n", + " og_url = metadata.get(\"ogUrl\")\n", + " title = metadata.get(\"title\")\n", + " if og_url and title and og_url != blog_url:\n", + " potential_links.append({\"url\": og_url, \"title\": title})\n", + " \n", + " print(f\"Collected {len(potential_links)} potential links:\")\n", + " for link in potential_links:\n", + " print(f\"URL: {link['url']}, Title: {link['title']}\")\n", + " \n", + "else:\n", + " print(\"crawl_result is empty or None\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Suggestion for: Meet MongoDBs CoachGTM.ai\n", + "Blog phrase: Mendable also provides a Tools\n", + "Internal Link: https://mendable.ai/blog/tools\n", + "---\n", + "\n", + "\n", + "Suggestion for: Meet MongoDBs CoachGTM.ai\n", + "Blog phrase: MongoDB Atlas Vector Search to\n", + "Internal Link: https://mendable.ai/blog/european-data-storage\n", + "---\n", + "\n", + "\n", + "Suggestion for: Meet MongoDBs CoachGTM.ai\n", + "Blog phrase: By harnessing the power of\n", + "Internal Link: https://mendable.ai/blog/building-copilots\n", + "---\n", + "\n", + "\n", + "Suggestion for: Building Safe RAG systems with the LLM OWASP top 10\n", + "Blog phrase: Advantages of RAG\n", + "Internal Link: https://mendable.ai/blog/building-copilots\n", + "---\n", + "\n", + "\n", + "Suggestion for: Building Safe RAG systems with the LLM OWASP top 10\n", + "Blog phrase: Bring Your Model\n", + "Internal Link: https://mendable.ai/blog/customprompt\n", + "---\n", + "\n", + "\n", + "Suggestion for: Building Safe RAG systems with the LLM OWASP top 10\n", + "Blog phrase: Garbage in, Garbage out\n", + "Internal Link: https://mendable.ai/blog/precisemode\n", + "---\n", + "\n", + "\n", + "Suggestion for: Navigating the Maze of GDPR Compliance: A Codebase Transformation\n", + "Blog phrase: European data storage\n", + "Internal Link: https://mendable.ai/blog/european-data-storage\n", + "---\n", + "\n", + "\n", + "Suggestion for: Navigating the Maze of GDPR Compliance: A Codebase Transformation\n", + "Blog phrase: delivering value\n", + "Internal Link: https://mendable.ai/blog/getting-started\n", + "---\n", + "\n", + "\n", + "Suggestion for: How Mendable leverages Langsmith to debug Tools & Actions\n", + "Blog phrase: introduction of Tools & Actions\n", + "Internal Link: https://mendable.ai/blog/tools\n", + "---\n", + "\n", + "\n", + "Suggestion for: How Mendable leverages Langsmith to debug Tools & Actions\n", + "Blog phrase: Mendable Tools & Actions\n", + "Internal Link: https://mendable.ai/blog/tools\n", + "---\n", + "\n", + "\n", + "Suggestion for: Launching European Data Storage powered by MongoDB\n", + "Blog phrase: Clean Architecture and Repository pattern\n", + "Internal Link: https://mendable.ai/blog/gdpr-repository-pattern\n", + "---\n", + "\n", + "\n", + "Suggestion for: Launching European Data Storage powered by MongoDB\n", + "Blog phrase: building the best AI Chat\n", + "Internal Link: https://mendable.ai/blog/building-copilots\n", + "---\n", + "\n", + "\n", + "Suggestion for: Launching European Data Storage powered by MongoDB\n", + "Blog phrase: European RAG pipeline, powered by\n", + "Internal Link: https://mendable.ai/blog/building-safe-rag\n", + "---\n", + "\n", + "\n", + "Suggestion for: Introducing Tools and Actions\n", + "Blog phrase: augmentation and actions for automation\n", + "Internal Link: https://mendable.ai/blog/building-copilots\n", + "---\n", + "\n", + "\n", + "Suggestion for: Introducing Tools and Actions\n", + "Blog phrase: Mendable provides an API request\n", + "Internal Link: https://mendable.ai/blog/getting-started\n", + "---\n", + "\n", + "\n", + "Suggestion for: Introducing Tools and Actions\n", + "Blog phrase: AI use it when it\n", + "Internal Link: https://mendable.ai/blog/how-mendable-leverages-langsmith-to-debug-tools-and-actions\n", + "---\n", + "\n", + "\n", + "Suggestion for: Mendable.ai December Recap\n", + "Blog phrase: customizing the model\n", + "Internal Link: https://mendable.ai/blog/customprompt\n", + "---\n", + "\n", + "\n", + "Suggestion for: Mendable.ai December Recap\n", + "Blog phrase: AI sales copilot\n", + "Internal Link: https://mendable.ai/blog/building-copilots\n", + "---\n", + "\n", + "\n", + "Suggestion for: Mendable.ai December Recap\n", + "Blog phrase: Introducing Tools and Actions\n", + "Internal Link: https://mendable.ai/blog/tools\n", + "---\n", + "\n", + "\n", + "Suggestion for: Mendable.ai November Update\n", + "Blog phrase: Auto syncing data sources\n", + "Internal Link: https://mendable.ai/blog/european-data-storage\n", + "---\n", + "\n", + "\n", + "Suggestion for: Mendable.ai November Update\n", + "Blog phrase: Chat insights feature\n", + "Internal Link: https://mendable.ai/blog/tools\n", + "---\n", + "\n", + "\n", + "Suggestion for: Mendable.ai November Update\n", + "Blog phrase: Github private repo support\n", + "Internal Link: https://mendable.ai/blog/getting-started\n", + "---\n", + "\n", + "\n", + "Suggestion for: Mendable.ai October Recap\n", + "Blog phrase: Full Prompt Customization\n", + "Internal Link: https://mendable.ai/blog/customprompt\n", + "---\n", + "\n", + "\n", + "Suggestion for: Mendable.ai October Recap\n", + "Blog phrase: Expanded Model Support\n", + "Internal Link: https://mendable.ai/blog/gpt35prompting\n", + "---\n", + "\n", + "\n", + "Suggestion for: Mendable.ai October Recap\n", + "Blog phrase: AI-Powered Documentation Management\n", + "Internal Link: https://mendable.ai/blog/building-copilots\n", + "---\n", + "\n", + "\n", + "Suggestion for: Mendable.ai Mid September 2023 Update\n", + "Blog phrase: new integration templates\n", + "Internal Link: https://mendable.ai/blog/tools\n", + "---\n", + "\n", + "\n", + "Suggestion for: Mendable.ai Mid September 2023 Update\n", + "Blog phrase: Product Copilot feature\n", + "Internal Link: https://mendable.ai/blog/building-copilots\n", + "---\n", + "\n", + "\n", + "Suggestion for: Mendable.ai Mid September 2023 Update\n", + "Blog phrase: Data Exporting\n", + "Internal Link: https://mendable.ai/blog/getting-started\n", + "---\n", + "\n", + "\n", + "Suggestion for: Everything you need to know about Mendable: Build and deploy AI Chat Search\n", + "Blog phrase: robust API\n", + "Internal Link: https://mendable.ai/blog/tools\n", + "---\n", + "\n", + "\n", + "Suggestion for: Everything you need to know about Mendable: Build and deploy AI Chat Search\n", + "Blog phrase: pre-built components\n", + "Internal Link: https://mendable.ai/blog/building-copilots\n", + "---\n", + "\n", + "\n", + "Suggestion for: Everything you need to know about Mendable: Build and deploy AI Chat Search\n", + "Blog phrase: Customizing Your LLM Model\n", + "Internal Link: https://mendable.ai/blog/customprompt\n", + "---\n", + "\n", + "\n", + "Suggestion for: Building context-aware AI copilots with Mendable\n", + "Blog phrase: registered on our platform\n", + "Internal Link: https://mendable.ai/blog/getting-started\n", + "---\n", + "\n", + "\n", + "Suggestion for: Building context-aware AI copilots with Mendable\n", + "Blog phrase: dynamic context to the AI\n", + "Internal Link: https://mendable.ai/blog/customprompt\n", + "---\n", + "\n", + "\n", + "Suggestion for: Building context-aware AI copilots with Mendable\n", + "Blog phrase: personalized answers to your users\n", + "Internal Link: https://mendable.ai/blog/precisemode\n", + "---\n", + "\n", + "\n", + "Suggestion for: Mendable.ai August 2023 Updates\n", + "Blog phrase: Learn more about how to\n", + "Internal Link: https://mendable.ai/blog/precisemode\n", + "---\n", + "\n", + "\n", + "Suggestion for: Mendable.ai August 2023 Updates\n", + "Blog phrase: Building context-aware AI copilots with\n", + "Internal Link: https://mendable.ai/blog/building-copilots\n", + "---\n", + "\n", + "\n", + "Suggestion for: Mendable.ai August 2023 Updates\n", + "Blog phrase: customizable AI chat components\n", + "Internal Link: https://mendable.ai/blog/getting-started\n", + "---\n", + "\n", + "\n" + ] + }, + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[15], line 57\u001b[0m\n\u001b[1;32m 27\u001b[0m prompt_instructions \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\"\"\u001b[39m\u001b[38;5;124mGiven this blog post from \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcurrent_blog_url\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m called \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcurrent_blog_title\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, analyze the following blog content. Identify 0 to 3 of phrases (5 words max) from the inside of the middle of the article that could be linked to other blog posts from the list of potential links provided inside of . Return a JSON object structured as follows:\u001b[39m\n\u001b[1;32m 28\u001b[0m \n\u001b[1;32m 29\u001b[0m \u001b[38;5;124m\u001b[39m\u001b[38;5;130;01m{{\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 53\u001b[0m \n\u001b[1;32m 54\u001b[0m \u001b[38;5;124mGO AND ONLY RETURN THE JSON NOTHING ELSE:\u001b[39m\u001b[38;5;124m\"\"\"\u001b[39m\n\u001b[1;32m 56\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 57\u001b[0m message \u001b[38;5;241m=\u001b[39m \u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmessages\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcreate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 58\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mclaude-3-5-sonnet-20240620\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 59\u001b[0m \u001b[43m \u001b[49m\u001b[43mmax_tokens\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1024\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 60\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\n\u001b[1;32m 61\u001b[0m \u001b[43m \u001b[49m\u001b[43m{\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrole\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43muser\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcontent\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mprompt_instructions\u001b[49m\u001b[43m}\u001b[49m\n\u001b[1;32m 62\u001b[0m \u001b[43m \u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 63\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 65\u001b[0m \u001b[38;5;66;03m# Extract the JSON string from the TextBlock\u001b[39;00m\n\u001b[1;32m 66\u001b[0m json_string \u001b[38;5;241m=\u001b[39m message\u001b[38;5;241m.\u001b[39mcontent[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39mtext\n", + "File \u001b[0;32m~/projects/python_projects/agents_testing/.conda/lib/python3.10/site-packages/anthropic/_utils/_utils.py:277\u001b[0m, in \u001b[0;36mrequired_args..inner..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 275\u001b[0m msg \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMissing required argument: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mquote(missing[\u001b[38;5;241m0\u001b[39m])\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 276\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(msg)\n\u001b[0;32m--> 277\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/projects/python_projects/agents_testing/.conda/lib/python3.10/site-packages/anthropic/resources/messages.py:904\u001b[0m, in \u001b[0;36mMessages.create\u001b[0;34m(self, max_tokens, messages, model, metadata, stop_sequences, stream, system, temperature, tool_choice, tools, top_k, top_p, extra_headers, extra_query, extra_body, timeout)\u001b[0m\n\u001b[1;32m 870\u001b[0m \u001b[38;5;129m@required_args\u001b[39m([\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmax_tokens\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmessages\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel\u001b[39m\u001b[38;5;124m\"\u001b[39m], [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmax_tokens\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmessages\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstream\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n\u001b[1;32m 871\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcreate\u001b[39m(\n\u001b[1;32m 872\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 902\u001b[0m timeout: \u001b[38;5;28mfloat\u001b[39m \u001b[38;5;241m|\u001b[39m httpx\u001b[38;5;241m.\u001b[39mTimeout \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m|\u001b[39m NotGiven \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m600\u001b[39m,\n\u001b[1;32m 903\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Message \u001b[38;5;241m|\u001b[39m Stream[RawMessageStreamEvent]:\n\u001b[0;32m--> 904\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_post\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 905\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/v1/messages\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 906\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmaybe_transform\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 907\u001b[0m \u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 908\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmax_tokens\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmax_tokens\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 909\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmessages\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 910\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmodel\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 911\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmetadata\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmetadata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 912\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstop_sequences\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mstop_sequences\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 913\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstream\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 914\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msystem\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43msystem\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 915\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtemperature\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtemperature\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 916\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtool_choice\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtool_choice\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 917\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtools\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtools\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 918\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtop_k\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtop_k\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 919\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtop_p\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtop_p\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 920\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 921\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessage_create_params\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mMessageCreateParams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 922\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 923\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmake_request_options\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 924\u001b[0m \u001b[43m \u001b[49m\u001b[43mextra_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_headers\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_query\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_query\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_body\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_body\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\n\u001b[1;32m 925\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 926\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mMessage\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 927\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 928\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mStream\u001b[49m\u001b[43m[\u001b[49m\u001b[43mRawMessageStreamEvent\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 929\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/projects/python_projects/agents_testing/.conda/lib/python3.10/site-packages/anthropic/_base_client.py:1249\u001b[0m, in \u001b[0;36mSyncAPIClient.post\u001b[0;34m(self, path, cast_to, body, options, files, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1235\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mpost\u001b[39m(\n\u001b[1;32m 1236\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 1237\u001b[0m path: \u001b[38;5;28mstr\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1244\u001b[0m stream_cls: \u001b[38;5;28mtype\u001b[39m[_StreamT] \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 1245\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ResponseT \u001b[38;5;241m|\u001b[39m _StreamT:\n\u001b[1;32m 1246\u001b[0m opts \u001b[38;5;241m=\u001b[39m FinalRequestOptions\u001b[38;5;241m.\u001b[39mconstruct(\n\u001b[1;32m 1247\u001b[0m method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpost\u001b[39m\u001b[38;5;124m\"\u001b[39m, url\u001b[38;5;241m=\u001b[39mpath, json_data\u001b[38;5;241m=\u001b[39mbody, files\u001b[38;5;241m=\u001b[39mto_httpx_files(files), \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39moptions\n\u001b[1;32m 1248\u001b[0m )\n\u001b[0;32m-> 1249\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(ResponseT, \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mopts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m)\u001b[49m)\n", + "File \u001b[0;32m~/projects/python_projects/agents_testing/.conda/lib/python3.10/site-packages/anthropic/_base_client.py:931\u001b[0m, in \u001b[0;36mSyncAPIClient.request\u001b[0;34m(self, cast_to, options, remaining_retries, stream, stream_cls)\u001b[0m\n\u001b[1;32m 922\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mrequest\u001b[39m(\n\u001b[1;32m 923\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 924\u001b[0m cast_to: Type[ResponseT],\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 929\u001b[0m stream_cls: \u001b[38;5;28mtype\u001b[39m[_StreamT] \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 930\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ResponseT \u001b[38;5;241m|\u001b[39m _StreamT:\n\u001b[0;32m--> 931\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 932\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 933\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 934\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 935\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 936\u001b[0m \u001b[43m \u001b[49m\u001b[43mremaining_retries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mremaining_retries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 937\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/projects/python_projects/agents_testing/.conda/lib/python3.10/site-packages/anthropic/_base_client.py:962\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, remaining_retries, stream, stream_cls)\u001b[0m\n\u001b[1;32m 959\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSending HTTP Request: \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m, request\u001b[38;5;241m.\u001b[39mmethod, request\u001b[38;5;241m.\u001b[39murl)\n\u001b[1;32m 961\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 962\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_client\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 963\u001b[0m \u001b[43m \u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 964\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_should_stream_response_body\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 965\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 966\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 967\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m httpx\u001b[38;5;241m.\u001b[39mTimeoutException \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[1;32m 968\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mEncountered httpx.TimeoutException\u001b[39m\u001b[38;5;124m\"\u001b[39m, exc_info\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n", + "File \u001b[0;32m~/projects/python_projects/agents_testing/.conda/lib/python3.10/site-packages/httpx/_client.py:901\u001b[0m, in \u001b[0;36mClient.send\u001b[0;34m(self, request, stream, auth, follow_redirects)\u001b[0m\n\u001b[1;32m 893\u001b[0m follow_redirects \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 894\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfollow_redirects\n\u001b[1;32m 895\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(follow_redirects, UseClientDefault)\n\u001b[1;32m 896\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m follow_redirects\n\u001b[1;32m 897\u001b[0m )\n\u001b[1;32m 899\u001b[0m auth \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_build_request_auth(request, auth)\n\u001b[0;32m--> 901\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_send_handling_auth\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 902\u001b[0m \u001b[43m \u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 903\u001b[0m \u001b[43m \u001b[49m\u001b[43mauth\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mauth\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 904\u001b[0m \u001b[43m \u001b[49m\u001b[43mfollow_redirects\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfollow_redirects\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 905\u001b[0m \u001b[43m \u001b[49m\u001b[43mhistory\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 906\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 907\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 908\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m stream:\n", + "File \u001b[0;32m~/projects/python_projects/agents_testing/.conda/lib/python3.10/site-packages/httpx/_client.py:929\u001b[0m, in \u001b[0;36mClient._send_handling_auth\u001b[0;34m(self, request, auth, follow_redirects, history)\u001b[0m\n\u001b[1;32m 926\u001b[0m request \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mnext\u001b[39m(auth_flow)\n\u001b[1;32m 928\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m--> 929\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_send_handling_redirects\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 930\u001b[0m \u001b[43m \u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 931\u001b[0m \u001b[43m \u001b[49m\u001b[43mfollow_redirects\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfollow_redirects\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 932\u001b[0m \u001b[43m \u001b[49m\u001b[43mhistory\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhistory\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 933\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 934\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 935\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n", + "File \u001b[0;32m~/projects/python_projects/agents_testing/.conda/lib/python3.10/site-packages/httpx/_client.py:966\u001b[0m, in \u001b[0;36mClient._send_handling_redirects\u001b[0;34m(self, request, follow_redirects, history)\u001b[0m\n\u001b[1;32m 963\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m hook \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_event_hooks[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrequest\u001b[39m\u001b[38;5;124m\"\u001b[39m]:\n\u001b[1;32m 964\u001b[0m hook(request)\n\u001b[0;32m--> 966\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_send_single_request\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 967\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 968\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m hook \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_event_hooks[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mresponse\u001b[39m\u001b[38;5;124m\"\u001b[39m]:\n", + "File \u001b[0;32m~/projects/python_projects/agents_testing/.conda/lib/python3.10/site-packages/httpx/_client.py:1002\u001b[0m, in \u001b[0;36mClient._send_single_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 997\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\n\u001b[1;32m 998\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAttempted to send an async request with a sync Client instance.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 999\u001b[0m )\n\u001b[1;32m 1001\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m request_context(request\u001b[38;5;241m=\u001b[39mrequest):\n\u001b[0;32m-> 1002\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mtransport\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhandle_request\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1004\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(response\u001b[38;5;241m.\u001b[39mstream, SyncByteStream)\n\u001b[1;32m 1006\u001b[0m response\u001b[38;5;241m.\u001b[39mrequest \u001b[38;5;241m=\u001b[39m request\n", + "File \u001b[0;32m~/projects/python_projects/agents_testing/.conda/lib/python3.10/site-packages/httpx/_transports/default.py:228\u001b[0m, in \u001b[0;36mHTTPTransport.handle_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 215\u001b[0m req \u001b[38;5;241m=\u001b[39m httpcore\u001b[38;5;241m.\u001b[39mRequest(\n\u001b[1;32m 216\u001b[0m method\u001b[38;5;241m=\u001b[39mrequest\u001b[38;5;241m.\u001b[39mmethod,\n\u001b[1;32m 217\u001b[0m url\u001b[38;5;241m=\u001b[39mhttpcore\u001b[38;5;241m.\u001b[39mURL(\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 225\u001b[0m extensions\u001b[38;5;241m=\u001b[39mrequest\u001b[38;5;241m.\u001b[39mextensions,\n\u001b[1;32m 226\u001b[0m )\n\u001b[1;32m 227\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m map_httpcore_exceptions():\n\u001b[0;32m--> 228\u001b[0m resp \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_pool\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhandle_request\u001b[49m\u001b[43m(\u001b[49m\u001b[43mreq\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 230\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(resp\u001b[38;5;241m.\u001b[39mstream, typing\u001b[38;5;241m.\u001b[39mIterable)\n\u001b[1;32m 232\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m Response(\n\u001b[1;32m 233\u001b[0m status_code\u001b[38;5;241m=\u001b[39mresp\u001b[38;5;241m.\u001b[39mstatus,\n\u001b[1;32m 234\u001b[0m headers\u001b[38;5;241m=\u001b[39mresp\u001b[38;5;241m.\u001b[39mheaders,\n\u001b[1;32m 235\u001b[0m stream\u001b[38;5;241m=\u001b[39mResponseStream(resp\u001b[38;5;241m.\u001b[39mstream),\n\u001b[1;32m 236\u001b[0m extensions\u001b[38;5;241m=\u001b[39mresp\u001b[38;5;241m.\u001b[39mextensions,\n\u001b[1;32m 237\u001b[0m )\n", + "File \u001b[0;32m~/projects/python_projects/agents_testing/.conda/lib/python3.10/site-packages/httpcore/_sync/connection_pool.py:268\u001b[0m, in \u001b[0;36mConnectionPool.handle_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 266\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m ShieldCancellation():\n\u001b[1;32m 267\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mresponse_closed(status)\n\u001b[0;32m--> 268\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exc\n\u001b[1;32m 269\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 270\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n", + "File \u001b[0;32m~/projects/python_projects/agents_testing/.conda/lib/python3.10/site-packages/httpcore/_sync/connection_pool.py:251\u001b[0m, in \u001b[0;36mConnectionPool.handle_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 248\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exc\n\u001b[1;32m 250\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 251\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mconnection\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhandle_request\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 252\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ConnectionNotAvailable:\n\u001b[1;32m 253\u001b[0m \u001b[38;5;66;03m# The ConnectionNotAvailable exception is a special case, that\u001b[39;00m\n\u001b[1;32m 254\u001b[0m \u001b[38;5;66;03m# indicates we need to retry the request on a new connection.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 258\u001b[0m \u001b[38;5;66;03m# might end up as an HTTP/2 connection, but which actually ends\u001b[39;00m\n\u001b[1;32m 259\u001b[0m \u001b[38;5;66;03m# up as HTTP/1.1.\u001b[39;00m\n\u001b[1;32m 260\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_pool_lock:\n\u001b[1;32m 261\u001b[0m \u001b[38;5;66;03m# Maintain our position in the request queue, but reset the\u001b[39;00m\n\u001b[1;32m 262\u001b[0m \u001b[38;5;66;03m# status so that the request becomes queued again.\u001b[39;00m\n", + "File \u001b[0;32m~/projects/python_projects/agents_testing/.conda/lib/python3.10/site-packages/httpcore/_sync/connection.py:103\u001b[0m, in \u001b[0;36mHTTPConnection.handle_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 100\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_connection\u001b[38;5;241m.\u001b[39mis_available():\n\u001b[1;32m 101\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m ConnectionNotAvailable()\n\u001b[0;32m--> 103\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_connection\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhandle_request\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/projects/python_projects/agents_testing/.conda/lib/python3.10/site-packages/httpcore/_sync/http11.py:133\u001b[0m, in \u001b[0;36mHTTP11Connection.handle_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 131\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m Trace(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mresponse_closed\u001b[39m\u001b[38;5;124m\"\u001b[39m, logger, request) \u001b[38;5;28;01mas\u001b[39;00m trace:\n\u001b[1;32m 132\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_response_closed()\n\u001b[0;32m--> 133\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exc\n", + "File \u001b[0;32m~/projects/python_projects/agents_testing/.conda/lib/python3.10/site-packages/httpcore/_sync/http11.py:111\u001b[0m, in \u001b[0;36mHTTP11Connection.handle_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 101\u001b[0m \u001b[38;5;28;01mpass\u001b[39;00m\n\u001b[1;32m 103\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m Trace(\n\u001b[1;32m 104\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mreceive_response_headers\u001b[39m\u001b[38;5;124m\"\u001b[39m, logger, request, kwargs\n\u001b[1;32m 105\u001b[0m ) \u001b[38;5;28;01mas\u001b[39;00m trace:\n\u001b[1;32m 106\u001b[0m (\n\u001b[1;32m 107\u001b[0m http_version,\n\u001b[1;32m 108\u001b[0m status,\n\u001b[1;32m 109\u001b[0m reason_phrase,\n\u001b[1;32m 110\u001b[0m headers,\n\u001b[0;32m--> 111\u001b[0m ) \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_receive_response_headers\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 112\u001b[0m trace\u001b[38;5;241m.\u001b[39mreturn_value \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 113\u001b[0m http_version,\n\u001b[1;32m 114\u001b[0m status,\n\u001b[1;32m 115\u001b[0m reason_phrase,\n\u001b[1;32m 116\u001b[0m headers,\n\u001b[1;32m 117\u001b[0m )\n\u001b[1;32m 119\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m Response(\n\u001b[1;32m 120\u001b[0m status\u001b[38;5;241m=\u001b[39mstatus,\n\u001b[1;32m 121\u001b[0m headers\u001b[38;5;241m=\u001b[39mheaders,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 127\u001b[0m },\n\u001b[1;32m 128\u001b[0m )\n", + "File \u001b[0;32m~/projects/python_projects/agents_testing/.conda/lib/python3.10/site-packages/httpcore/_sync/http11.py:176\u001b[0m, in \u001b[0;36mHTTP11Connection._receive_response_headers\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 173\u001b[0m timeout \u001b[38;5;241m=\u001b[39m timeouts\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mread\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[1;32m 175\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m--> 176\u001b[0m event \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_receive_event\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 177\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(event, h11\u001b[38;5;241m.\u001b[39mResponse):\n\u001b[1;32m 178\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n", + "File \u001b[0;32m~/projects/python_projects/agents_testing/.conda/lib/python3.10/site-packages/httpcore/_sync/http11.py:212\u001b[0m, in \u001b[0;36mHTTP11Connection._receive_event\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 209\u001b[0m event \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_h11_state\u001b[38;5;241m.\u001b[39mnext_event()\n\u001b[1;32m 211\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m event \u001b[38;5;129;01mis\u001b[39;00m h11\u001b[38;5;241m.\u001b[39mNEED_DATA:\n\u001b[0;32m--> 212\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_network_stream\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 213\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mREAD_NUM_BYTES\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\n\u001b[1;32m 214\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 216\u001b[0m \u001b[38;5;66;03m# If we feed this case through h11 we'll raise an exception like:\u001b[39;00m\n\u001b[1;32m 217\u001b[0m \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[1;32m 218\u001b[0m \u001b[38;5;66;03m# httpcore.RemoteProtocolError: can't handle event type\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 222\u001b[0m \u001b[38;5;66;03m# perspective. Instead we handle this case distinctly and treat\u001b[39;00m\n\u001b[1;32m 223\u001b[0m \u001b[38;5;66;03m# it as a ConnectError.\u001b[39;00m\n\u001b[1;32m 224\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m data \u001b[38;5;241m==\u001b[39m \u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_h11_state\u001b[38;5;241m.\u001b[39mtheir_state \u001b[38;5;241m==\u001b[39m h11\u001b[38;5;241m.\u001b[39mSEND_RESPONSE:\n", + "File \u001b[0;32m~/projects/python_projects/agents_testing/.conda/lib/python3.10/site-packages/httpcore/_backends/sync.py:126\u001b[0m, in \u001b[0;36mSyncStream.read\u001b[0;34m(self, max_bytes, timeout)\u001b[0m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m map_exceptions(exc_map):\n\u001b[1;32m 125\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_sock\u001b[38;5;241m.\u001b[39msettimeout(timeout)\n\u001b[0;32m--> 126\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sock\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrecv\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmax_bytes\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/projects/python_projects/agents_testing/.conda/lib/python3.10/ssl.py:1292\u001b[0m, in \u001b[0;36mSSLSocket.recv\u001b[0;34m(self, buflen, flags)\u001b[0m\n\u001b[1;32m 1288\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m flags \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 1289\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 1290\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnon-zero flags not allowed in calls to recv() on \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m\n\u001b[1;32m 1291\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m)\n\u001b[0;32m-> 1292\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbuflen\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1293\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1294\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39mrecv(buflen, flags)\n", + "File \u001b[0;32m~/projects/python_projects/agents_testing/.conda/lib/python3.10/ssl.py:1165\u001b[0m, in \u001b[0;36mSSLSocket.read\u001b[0;34m(self, len, buffer)\u001b[0m\n\u001b[1;32m 1163\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_sslobj\u001b[38;5;241m.\u001b[39mread(\u001b[38;5;28mlen\u001b[39m, buffer)\n\u001b[1;32m 1164\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1165\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sslobj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1166\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m SSLError \u001b[38;5;28;01mas\u001b[39;00m x:\n\u001b[1;32m 1167\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m x\u001b[38;5;241m.\u001b[39margs[\u001b[38;5;241m0\u001b[39m] \u001b[38;5;241m==\u001b[39m SSL_ERROR_EOF \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msuppress_ragged_eofs:\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], + "source": [ + "import json\n", + "import csv\n", + "\n", + "# Assuming we have the following variables from the previous code:\n", + "# crawl_result, client, potential_links\n", + "\n", + "# Convert potential_links to a JSON string\n", + "potential_links_json = json.dumps(potential_links, indent=2)\n", + "\n", + "# Prepare CSV file\n", + "csv_filename = \"link_suggestions.csv\"\n", + "csv_headers = [\"Source Blog Title\", \"Source Blog URL\", \"Target Phrase\", \"Suggested Link URL\"]\n", + "\n", + "# Write headers to the CSV file\n", + "with open(csv_filename, 'w', newline='', encoding='utf-8') as csvfile:\n", + " csvwriter = csv.writer(csvfile)\n", + " csvwriter.writerow(csv_headers)\n", + "\n", + "# Loop through each blog post content\n", + "for item in crawl_result:\n", + " current_blog_url = item[\"metadata\"].get(\"ogUrl\", \"\")\n", + " if current_blog_url == blog_url:\n", + " continue\n", + " current_blog_content = item[\"content\"]\n", + " current_blog_title = item[\"metadata\"].get(\"title\", \"\")\n", + "\n", + " prompt_instructions = f\"\"\"Given this blog post from {current_blog_url} called '{current_blog_title}', analyze the following blog content. Identify 0 to 3 of phrases (5 words max) from the inside of the middle of the article that could be linked to other blog posts from the list of potential links provided inside of . Return a JSON object structured as follows:\n", + "\n", + " {{\n", + " \"link_suggestions\": [\n", + " {{\n", + " \"target_phrase\": \"the EXACT phrase from the to be linked to one of the links in (5 words max)\",\n", + " \"suggested_link_url\": \"url of the suggested internal link from \",\n", + " }}\n", + " ],\n", + " \"metadata\": {{\n", + " \"source_blog_url\": \"{current_blog_url}\",\n", + " \"source_blog_title\": \"{current_blog_title}\",\n", + " }}\n", + " }}\n", + "\n", + " Ensure that you provide the EXACT phrase from in target_phrase (5 words max) to locate each suggestion in the blog content without using character positions. Your target phrases must NOT be a title!\n", + "\n", + " Blog Content:\n", + " \n", + " {current_blog_content}\n", + " \n", + "\n", + " Potential Links:\n", + " \n", + " {potential_links_json}\n", + " \n", + "\n", + " GO AND ONLY RETURN THE JSON NOTHING ELSE:\"\"\"\n", + "\n", + " try:\n", + " message = client.messages.create(\n", + " model=\"claude-3-5-sonnet-20240620\",\n", + " max_tokens=1024,\n", + " messages=[\n", + " {\"role\": \"user\", \"content\": prompt_instructions}\n", + " ]\n", + " )\n", + " \n", + " # Extract the JSON string from the TextBlock\n", + " json_string = message.content[0].text\n", + " \n", + " # Parse the JSON response\n", + " response_json = json.loads(json_string)\n", + " \n", + " # Write suggestions to CSV\n", + " for suggestion in response_json['link_suggestions']:\n", + " print(\"Suggestion for: \" + current_blog_title )\n", + " print(\"Blog phrase: \" + suggestion['target_phrase']) \n", + " print(\"Internal Link: \" + suggestion['suggested_link_url'])\n", + " print(\"---\\n\\n\")\n", + "\n", + " # Open the CSV file in append mode and write the new row\n", + " with open(csv_filename, 'a', newline='', encoding='utf-8') as csvfile:\n", + " csvwriter = csv.writer(csvfile)\n", + " csvwriter.writerow([\n", + " response_json['metadata']['source_blog_title'],\n", + " response_json['metadata']['source_blog_url'],\n", + " suggestion['target_phrase'],\n", + " suggestion['suggested_link_url'],\n", + " ])\n", + " \n", + " except json.JSONDecodeError:\n", + " print(f\"Error parsing JSON response for blog {current_blog_title}\")\n", + " print(\"Raw response:\", message.content)\n", + " except Exception as e:\n", + " print(f\"Error processing blog {current_blog_title}: {str(e)}\")\n", + " \n", + "\n", + "print(f\"Finished processing all blog posts. Results saved to {csv_filename}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}