Merge branch 'main' into mog/concurrency-limit-2

This commit is contained in:
Nicolas 2024-10-30 17:05:40 -03:00
commit ea85b1d602
28 changed files with 974 additions and 134 deletions

View File

@ -80,6 +80,7 @@ To use the API, you need to sign up on [Firecrawl](https://firecrawl.dev) and ge
- **Media parsing**: pdfs, docx, images.
- **Reliability first**: designed to get the data you need - no matter how hard it is.
- **Actions**: click, scroll, input, wait and more before extracting data
- **Batching (New)**: scrape thousands of URLs at the same time with a new async endpoint
You can find all of Firecrawl's capabilities and how to use them in our [documentation](https://docs.firecrawl.dev)
@ -350,6 +351,19 @@ curl -X POST https://api.firecrawl.dev/v1/scrape \
}'
```
### Batch Scraping Multiple URLs (New)
You can now batch scrape multiple URLs at the same time. It is very similar to how the /crawl endpoint works. It submits a batch scrape job and returns a job ID to check the status of the batch scrape.
```bash
curl -X POST https://api.firecrawl.dev/v1/batch/scrape \
-H 'Content-Type: application/json' \
-H 'Authorization: Bearer YOUR_API_KEY' \
-d '{
"urls": ["https://docs.firecrawl.dev", "https://docs.firecrawl.dev/sdks/overview"],
"formats" : ["markdown", "html"]
}'
```
### Search (v0) (Beta)
@ -483,7 +497,7 @@ const crawlResponse = await app.crawlUrl('https://firecrawl.dev', {
scrapeOptions: {
formats: ['markdown', 'html'],
}
} as CrawlParams, true, 30) as CrawlStatusResponse;
} satisfies CrawlParams, true, 30) satisfies CrawlStatusResponse;
if (crawlResponse) {
console.log(crawlResponse)

View File

@ -1,5 +1,5 @@
# ===== Required ENVS ======
NUM_WORKERS_PER_QUEUE=8
NUM_WORKERS_PER_QUEUE=8
PORT=3002
HOST=0.0.0.0
REDIS_URL=redis://redis:6379 #for self-hosting using docker, use redis://redis:6379. For running locally, use redis://localhost:6379
@ -11,9 +11,14 @@ USE_DB_AUTHENTICATION=true
# ===== Optional ENVS ======
# SearchApi key. Head to https://searchapi.com/ to get your API key
SEARCHAPI_API_KEY=
# SearchApi engine, defaults to google. Available options: google, bing, baidu, google_news, etc. Head to https://searchapi.com/ to explore more engines
SEARCHAPI_ENGINE=
# Supabase Setup (used to support DB authentication, advanced logging, etc.)
SUPABASE_ANON_TOKEN=
SUPABASE_URL=
SUPABASE_ANON_TOKEN=
SUPABASE_URL=
SUPABASE_SERVICE_TOKEN=
# Other Optionals

View File

@ -12,4 +12,4 @@ ANTHROPIC_API_KEY=
BULL_AUTH_KEY=
LOGTAIL_KEY=
PLAYWRIGHT_MICROSERVICE_URL=
SEARCHAPI_API_KEY=

View File

@ -75,15 +75,19 @@ export async function setCachedACUC(
export async function getACUC(
api_key: string,
cacheOnly = false
cacheOnly = false,
useCache = true
): Promise<AuthCreditUsageChunk | null> {
const cacheKeyACUC = `acuc_${api_key}`;
const cachedACUC = await getValue(cacheKeyACUC);
if (useCache) {
const cachedACUC = await getValue(cacheKeyACUC);
if (cachedACUC !== null) {
return JSON.parse(cachedACUC);
}
}
if (cachedACUC !== null) {
return JSON.parse(cachedACUC);
} else if (!cacheOnly) {
if (!cacheOnly) {
let data;
let error;
let retries = 0;
@ -91,7 +95,7 @@ export async function getACUC(
while (retries < maxRetries) {
({ data, error } = await supabase_service.rpc(
"auth_credit_usage_chunk_test_3",
"auth_credit_usage_chunk_test_21_credit_pack",
{ input_key: api_key }
));
@ -118,9 +122,11 @@ export async function getACUC(
data.length === 0 ? null : data[0].team_id === null ? null : data[0];
// NOTE: Should we cache null chunks? - mogery
if (chunk !== null) {
if (chunk !== null && useCache) {
setCachedACUC(api_key, chunk);
}
// console.log(chunk);
return chunk;
} else {
@ -348,9 +354,12 @@ function getPlanByPriceId(price_id: string): PlanType {
return "standardnew";
case process.env.STRIPE_PRICE_ID_GROWTH:
case process.env.STRIPE_PRICE_ID_GROWTH_YEARLY:
case process.env.STRIPE_PRICE_ID_SCALE_2M:
return "growth";
case process.env.STRIPE_PRICE_ID_GROWTH_DOUBLE_MONTHLY:
return "growthdouble";
case process.env.STRIPE_PRICE_ID_ETIER2C:
return "etier2c";
default:
return "free";
}

View File

@ -4,6 +4,7 @@ import {
BatchScrapeRequest,
batchScrapeRequestSchema,
CrawlResponse,
legacyExtractorOptions,
legacyScrapeOptions,
RequestWithAuth,
} from "./types";
@ -35,6 +36,8 @@ export async function batchScrapeController(
}
const pageOptions = legacyScrapeOptions(req.body);
const extractorOptions = req.body.extract ? legacyExtractorOptions(req.body.extract) : undefined;
const sc: StoredCrawl = {
crawlerOptions: null,
@ -64,6 +67,7 @@ export async function batchScrapeController(
plan: req.auth.plan,
crawlerOptions: null,
pageOptions,
extractorOptions,
origin: "api",
crawl_id: id,
sitemapped: true,

View File

@ -109,13 +109,26 @@ export const scrapeOptions = z.object({
extract: extractOptions.optional(),
parsePDF: z.boolean().default(true),
actions: actionsSchema.optional(),
// New
location: z.object({
country: z.string().optional().refine(
(val) => !val || Object.keys(countries).includes(val.toUpperCase()),
{
message: "Invalid country code. Please use a valid ISO 3166-1 alpha-2 country code.",
}
).transform(val => val ? val.toUpperCase() : 'US'),
languages: z.string().array().optional(),
}).optional(),
// Deprecated
geolocation: z.object({
country: z.string().optional().refine(
(val) => !val || Object.keys(countries).includes(val.toUpperCase()),
{
message: "Invalid country code. Please use a valid ISO 3166-1 alpha-2 country code.",
}
).transform(val => val ? val.toUpperCase() : 'US')
).transform(val => val ? val.toUpperCase() : 'US'),
languages: z.string().array().optional(),
}).optional(),
skipTlsVerification: z.boolean().default(false),
}).strict(strictMessage)
@ -362,6 +375,8 @@ export type AuthCreditUsageChunk = {
coupons: any[];
adjusted_credits_used: number; // credits this period minus coupons used
remaining_credits: number;
sub_user_id: string | null;
total_credits_sum: number;
};
export interface RequestWithMaybeACUC<
@ -443,7 +458,7 @@ export function legacyScrapeOptions(x: ScrapeOptions): PageOptions {
fullPageScreenshot: x.formats.includes("screenshot@fullPage"),
parsePDF: x.parsePDF,
actions: x.actions as Action[], // no strict null checking grrrr - mogery
geolocation: x.geolocation,
geolocation: x.location ?? x.geolocation,
skipTlsVerification: x.skipTlsVerification
};
}

View File

@ -70,6 +70,10 @@ export async function getJobPriority({
bucketLimit = 400;
planModifier = 0.1;
break;
case "etier2c":
bucketLimit = 1000;
planModifier = 0.05;
break;
default:
bucketLimit = 25;

View File

@ -121,8 +121,13 @@ export async function runWebScraper({
: docs;
if(is_scrape === false) {
billTeam(team_id, undefined, filteredDocs.length).catch(error => {
Logger.error(`Failed to bill team ${team_id} for ${filteredDocs.length} credits: ${error}`);
let creditsToBeBilled = 1; // Assuming 1 credit per document
if (extractorOptions && (extractorOptions.mode === "llm-extraction" || extractorOptions.mode === "extract")) {
creditsToBeBilled = 5;
}
billTeam(team_id, undefined, creditsToBeBilled * filteredDocs.length).catch(error => {
Logger.error(`Failed to bill team ${team_id} for ${creditsToBeBilled * filteredDocs.length} credits: ${error}`);
// Optionally, you could notify an admin or add to a retry queue here
});
}

View File

@ -209,14 +209,15 @@ export async function scrapSingleUrl(
if (action.type === "click" || action.type === "write" || action.type === "press") {
const result: Action[] = [];
// Don't add a wait if the previous action is a wait
if (index === 0 || array[index - 1].type !== "wait") {
result.push({ type: "wait", milliseconds: 1200 } as Action);
}
// if (index === 0 || array[index - 1].type !== "wait") {
// result.push({ type: "wait", milliseconds: 1200 } as Action);
// }
// Fire-engine now handles wait times automatically, leaving the code here for now
result.push(action);
// Don't add a wait if the next action is a wait
if (index === array.length - 1 || array[index + 1].type !== "wait") {
result.push({ type: "wait", milliseconds: 1200 } as Action);
}
// if (index === array.length - 1 || array[index + 1].type !== "wait") {
// result.push({ type: "wait", milliseconds: 1200 } as Action);
// }
return result;
}
return [action as Action];

View File

@ -2,6 +2,7 @@ import { Logger } from "../../src/lib/logger";
import { SearchResult } from "../../src/lib/entities";
import { googleSearch } from "./googlesearch";
import { fireEngineMap } from "./fireEngine";
import { searchapi_search } from "./searchapi";
import { serper_search } from "./serper";
export async function search({
@ -30,7 +31,16 @@ export async function search({
timeout?: number;
}): Promise<SearchResult[]> {
try {
if (process.env.SEARCHAPI_API_KEY) {
return await searchapi_search(query, {
num_results,
tbs,
filter,
lang,
country,
location
});
}
if (process.env.SERPER_API_KEY) {
return await serper_search(query, {
num_results,

View File

@ -0,0 +1,60 @@
import axios from "axios";
import dotenv from "dotenv";
import { SearchResult } from "../../src/lib/entities";
dotenv.config();
interface SearchOptions {
tbs?: string;
filter?: string;
lang?: string;
country?: string;
location?: string;
num_results: number;
page?: number;
}
export async function searchapi_search(q: string, options: SearchOptions): Promise<SearchResult[]> {
const params = {
q: q,
hl: options.lang,
gl: options.country,
location: options.location,
num: options.num_results,
page: options.page ?? 1,
engine: process.env.SEARCHAPI_ENGINE || "google",
};
const url = `https://www.searchapi.io/api/v1/search`;
try {
const response = await axios.get(url, {
headers: {
"Authorization": `Bearer ${process.env.SEARCHAPI_API_KEY}`,
"Content-Type": "application/json",
"X-SearchApi-Source": "Firecrawl",
},
params: params,
});
if (response.status === 401) {
throw new Error("Unauthorized. Please check your API key.");
}
const data = response.data;
if (data && Array.isArray(data.organic_results)) {
return data.organic_results.map((a: any) => ({
url: a.link,
title: a.title,
description: a.snippet,
}));
} else {
return [];
}
} catch (error) {
console.error(`There was an error searching for content: ${error.message}`);
return [];
}
}

View File

@ -0,0 +1,176 @@
// Import necessary dependencies and types
import { AuthCreditUsageChunk } from "../../controllers/v1/types";
import { getACUC, setCachedACUC } from "../../controllers/auth";
import { redlock } from "../redlock";
import { supabase_service } from "../supabase";
import { createPaymentIntent } from "./stripe";
import { issueCredits } from "./issue_credits";
import { sendNotification } from "../notification/email_notification";
import { NotificationType } from "../../types";
import { deleteKey, getValue, setValue } from "../redis";
import { sendSlackWebhook } from "../alerts/slack";
import { Logger } from "../../lib/logger";
// Define the number of credits to be added during auto-recharge
const AUTO_RECHARGE_CREDITS = 1000;
const AUTO_RECHARGE_COOLDOWN = 300; // 5 minutes in seconds
/**
* Attempt to automatically charge a user's account when their credit balance falls below a threshold
* @param chunk The user's current usage data
* @param autoRechargeThreshold The credit threshold that triggers auto-recharge
*/
export async function autoCharge(
chunk: AuthCreditUsageChunk,
autoRechargeThreshold: number
): Promise<{ success: boolean; message: string; remainingCredits: number; chunk: AuthCreditUsageChunk }> {
const resource = `auto-recharge:${chunk.team_id}`;
const cooldownKey = `auto-recharge-cooldown:${chunk.team_id}`;
try {
// Check if the team is in the cooldown period
// Another check to prevent race conditions, double charging - cool down of 5 minutes
const cooldownValue = await getValue(cooldownKey);
if (cooldownValue) {
Logger.info(`Auto-recharge for team ${chunk.team_id} is in cooldown period`);
return {
success: false,
message: "Auto-recharge is in cooldown period",
remainingCredits: chunk.remaining_credits,
chunk,
};
}
// Use a distributed lock to prevent concurrent auto-charge attempts
return await redlock.using([resource], 5000, async (signal) : Promise<{ success: boolean; message: string; remainingCredits: number; chunk: AuthCreditUsageChunk }> => {
// Recheck the condition inside the lock to prevent race conditions
const updatedChunk = await getACUC(chunk.api_key, false, false);
if (
updatedChunk &&
updatedChunk.remaining_credits < autoRechargeThreshold
) {
if (chunk.sub_user_id) {
// Fetch the customer's Stripe information
const { data: customer, error: customersError } =
await supabase_service
.from("customers")
.select("id, stripe_customer_id")
.eq("id", chunk.sub_user_id)
.single();
if (customersError) {
Logger.error(`Error fetching customer data: ${customersError}`);
return {
success: false,
message: "Error fetching customer data",
remainingCredits: chunk.remaining_credits,
chunk,
};
}
if (customer && customer.stripe_customer_id) {
let issueCreditsSuccess = false;
// Attempt to create a payment intent
const paymentStatus = await createPaymentIntent(
chunk.team_id,
customer.stripe_customer_id
);
// If payment is successful or requires further action, issue credits
if (
paymentStatus.return_status === "succeeded" ||
paymentStatus.return_status === "requires_action"
) {
issueCreditsSuccess = await issueCredits(
chunk.team_id,
AUTO_RECHARGE_CREDITS
);
}
// Record the auto-recharge transaction
await supabase_service.from("auto_recharge_transactions").insert({
team_id: chunk.team_id,
initial_payment_status: paymentStatus.return_status,
credits_issued: issueCreditsSuccess ? AUTO_RECHARGE_CREDITS : 0,
stripe_charge_id: paymentStatus.charge_id,
});
// Send a notification if credits were successfully issued
if (issueCreditsSuccess) {
await sendNotification(
chunk.team_id,
NotificationType.AUTO_RECHARGE_SUCCESS,
chunk.sub_current_period_start,
chunk.sub_current_period_end,
chunk,
true
);
// Set cooldown period
await setValue(cooldownKey, 'true', AUTO_RECHARGE_COOLDOWN);
}
// Reset ACUC cache to reflect the new credit balance
const cacheKeyACUC = `acuc_${chunk.api_key}`;
await deleteKey(cacheKeyACUC);
if (process.env.SLACK_ADMIN_WEBHOOK_URL) {
const webhookCooldownKey = `webhook_cooldown_${chunk.team_id}`;
const isInCooldown = await getValue(webhookCooldownKey);
if (!isInCooldown) {
sendSlackWebhook(
`Auto-recharge: Team ${chunk.team_id}. ${AUTO_RECHARGE_CREDITS} credits added. Payment status: ${paymentStatus.return_status}.`,
false,
process.env.SLACK_ADMIN_WEBHOOK_URL
).catch((error) => {
Logger.debug(`Error sending slack notification: ${error}`);
});
// Set cooldown for 1 hour
await setValue(webhookCooldownKey, 'true', 60 * 60);
}
}
return {
success: true,
message: "Auto-recharge successful",
remainingCredits: chunk.remaining_credits + AUTO_RECHARGE_CREDITS,
chunk: {...chunk, remaining_credits: chunk.remaining_credits + AUTO_RECHARGE_CREDITS},
};
} else {
Logger.error("No Stripe customer ID found for user");
return {
success: false,
message: "No Stripe customer ID found for user",
remainingCredits: chunk.remaining_credits,
chunk,
};
}
} else {
Logger.error("No sub_user_id found in chunk");
return {
success: false,
message: "No sub_user_id found in chunk",
remainingCredits: chunk.remaining_credits,
chunk,
};
}
}
return {
success: false,
message: "No need to auto-recharge",
remainingCredits: chunk.remaining_credits,
chunk,
};
});
} catch (error) {
Logger.error(`Failed to acquire lock for auto-recharge: ${error}`);
return {
success: false,
message: "Failed to acquire lock for auto-recharge",
remainingCredits: chunk.remaining_credits,
chunk,
};
}
}

View File

@ -6,24 +6,40 @@ import { Logger } from "../../lib/logger";
import * as Sentry from "@sentry/node";
import { AuthCreditUsageChunk } from "../../controllers/v1/types";
import { getACUC, setCachedACUC } from "../../controllers/auth";
import { issueCredits } from "./issue_credits";
import { redlock } from "../redlock";
import { autoCharge } from "./auto_charge";
import { getValue, setValue } from "../redis";
const FREE_CREDITS = 500;
/**
* If you do not know the subscription_id in the current context, pass subscription_id as undefined.
*/
export async function billTeam(team_id: string, subscription_id: string | null | undefined, credits: number) {
export async function billTeam(
team_id: string,
subscription_id: string | null | undefined,
credits: number
) {
return withAuth(supaBillTeam)(team_id, subscription_id, credits);
}
export async function supaBillTeam(team_id: string, subscription_id: string, credits: number) {
export async function supaBillTeam(
team_id: string,
subscription_id: string,
credits: number
) {
if (team_id === "preview") {
return { success: true, message: "Preview team, no credits used" };
}
Logger.info(`Billing team ${team_id} for ${credits} credits`);
const { data, error } =
await supabase_service.rpc("bill_team", { _team_id: team_id, sub_id: subscription_id ?? null, fetch_subscription: subscription_id === undefined, credits });
const { data, error } = await supabase_service.rpc("bill_team", {
_team_id: team_id,
sub_id: subscription_id ?? null,
fetch_subscription: subscription_id === undefined,
credits,
});
if (error) {
Sentry.captureException(error);
Logger.error("Failed to bill team: " + JSON.stringify(error));
@ -31,48 +47,109 @@ export async function supaBillTeam(team_id: string, subscription_id: string, cre
}
(async () => {
for (const apiKey of (data ?? []).map(x => x.api_key)) {
await setCachedACUC(apiKey, acuc => (acuc ? {
...acuc,
credits_used: acuc.credits_used + credits,
adjusted_credits_used: acuc.adjusted_credits_used + credits,
remaining_credits: acuc.remaining_credits - credits,
} : null));
for (const apiKey of (data ?? []).map((x) => x.api_key)) {
await setCachedACUC(apiKey, (acuc) =>
acuc
? {
...acuc,
credits_used: acuc.credits_used + credits,
adjusted_credits_used: acuc.adjusted_credits_used + credits,
remaining_credits: acuc.remaining_credits - credits,
}
: null
);
}
})();
}
export async function checkTeamCredits(chunk: AuthCreditUsageChunk, team_id: string, credits: number) {
return withAuth(supaCheckTeamCredits)(chunk, team_id, credits);
export async function checkTeamCredits(
chunk: AuthCreditUsageChunk,
team_id: string,
credits: number
): Promise<{ success: boolean; message: string; remainingCredits: number; chunk: AuthCreditUsageChunk }> {
const result = await withAuth(supaCheckTeamCredits)(chunk, team_id, credits);
return {
success: result.success,
message: result.message,
remainingCredits: result.remainingCredits,
chunk: chunk // Ensure chunk is always returned
};
}
// if team has enough credits for the operation, return true, else return false
export async function supaCheckTeamCredits(chunk: AuthCreditUsageChunk, team_id: string, credits: number) {
export async function supaCheckTeamCredits(
chunk: AuthCreditUsageChunk,
team_id: string,
credits: number
) {
// WARNING: chunk will be null if team_id is preview -- do not perform operations on it under ANY circumstances - mogery
if (team_id === "preview") {
return { success: true, message: "Preview team, no credits used", remainingCredits: Infinity };
return {
success: true,
message: "Preview team, no credits used",
remainingCredits: Infinity,
};
}
const creditsWillBeUsed = chunk.adjusted_credits_used + credits;
// In case chunk.price_credits is undefined, set it to a large number to avoid mistakes
const totalPriceCredits = chunk.price_credits ?? 100000000;
const totalPriceCredits = chunk.total_credits_sum ?? 100000000;
// Removal of + credits
const creditUsagePercentage = chunk.adjusted_credits_used / totalPriceCredits;
let isAutoRechargeEnabled = false, autoRechargeThreshold = 1000;
const cacheKey = `team_auto_recharge_${team_id}`;
let cachedData = await getValue(cacheKey);
if (cachedData) {
const parsedData = JSON.parse(cachedData);
isAutoRechargeEnabled = parsedData.auto_recharge;
autoRechargeThreshold = parsedData.auto_recharge_threshold;
} else {
const { data, error } = await supabase_service
.from("teams")
.select("auto_recharge, auto_recharge_threshold")
.eq("id", team_id)
.single();
if (data) {
isAutoRechargeEnabled = data.auto_recharge;
autoRechargeThreshold = data.auto_recharge_threshold;
await setValue(cacheKey, JSON.stringify(data), 300); // Cache for 5 minutes (300 seconds)
}
}
if (isAutoRechargeEnabled && chunk.remaining_credits < autoRechargeThreshold) {
const autoChargeResult = await autoCharge(chunk, autoRechargeThreshold);
if (autoChargeResult.success) {
return {
success: true,
message: autoChargeResult.message,
remainingCredits: autoChargeResult.remainingCredits,
chunk: autoChargeResult.chunk,
};
}
}
// Compare the adjusted total credits used with the credits allowed by the plan
if (creditsWillBeUsed > totalPriceCredits) {
// Only notify if their actual credits (not what they will use) used is greater than the total price credits
if(chunk.adjusted_credits_used > totalPriceCredits) {
if (chunk.adjusted_credits_used > totalPriceCredits) {
sendNotification(
team_id,
NotificationType.LIMIT_REACHED,
chunk.sub_current_period_start,
chunk.sub_current_period_end,
chunk
);
}
return { success: false, message: "Insufficient credits to perform this request. For more credits, you can upgrade your plan at https://firecrawl.dev/pricing.", remainingCredits: chunk.remaining_credits, chunk };
NotificationType.LIMIT_REACHED,
chunk.sub_current_period_start,
chunk.sub_current_period_end,
chunk
);
}
return {
success: false,
message:
"Insufficient credits to perform this request. For more credits, you can upgrade your plan at https://firecrawl.dev/pricing.",
remainingCredits: chunk.remaining_credits,
chunk,
};
} else if (creditUsagePercentage >= 0.8 && creditUsagePercentage < 1) {
// Send email notification for approaching credit limit
sendNotification(
@ -84,7 +161,12 @@ export async function supaCheckTeamCredits(chunk: AuthCreditUsageChunk, team_id:
);
}
return { success: true, message: "Sufficient credits available", remainingCredits: chunk.remaining_credits, chunk };
return {
success: true,
message: "Sufficient credits available",
remainingCredits: chunk.remaining_credits,
chunk,
};
}
// Count the total credits used by a team within the current billing period and return the remaining credits.

View File

@ -0,0 +1,20 @@
import { Logger } from "../../lib/logger";
import { supabase_service } from "../supabase";
export async function issueCredits(team_id: string, credits: number) {
// Add an entry to supabase coupons
const { data, error } = await supabase_service.from("coupons").insert({
team_id: team_id,
credits: credits,
status: "active",
// indicates that this coupon was issued from auto recharge
from_auto_recharge: true,
});
if (error) {
Logger.error(`Error adding coupon: ${error}`);
return false;
}
return true;
}

View File

@ -0,0 +1,56 @@
import { Logger } from "../../lib/logger";
import Stripe from "stripe";
const stripe = new Stripe(process.env.STRIPE_SECRET_KEY ?? "");
async function getCustomerDefaultPaymentMethod(customerId: string) {
const paymentMethods = await stripe.customers.listPaymentMethods(customerId, {
limit: 3,
});
return paymentMethods.data[0] ?? null;
}
type ReturnStatus = "succeeded" | "requires_action" | "failed";
export async function createPaymentIntent(
team_id: string,
customer_id: string
): Promise<{ return_status: ReturnStatus; charge_id: string }> {
try {
const defaultPaymentMethod = await getCustomerDefaultPaymentMethod(customer_id);
if (!defaultPaymentMethod) {
Logger.error(`No default payment method found for customer: ${customer_id}`);
return { return_status: "failed", charge_id: "" };
}
const paymentIntent = await stripe.paymentIntents.create({
amount: 1100,
currency: "usd",
customer: customer_id,
description: "Firecrawl: Auto re-charge of 1000 credits",
payment_method_types: [defaultPaymentMethod?.type ?? "card"],
payment_method: defaultPaymentMethod?.id,
off_session: true,
confirm: true,
});
if (paymentIntent.status === "succeeded") {
Logger.info(`Payment succeeded for team: ${team_id}`);
return { return_status: "succeeded", charge_id: paymentIntent.id };
} else if (
paymentIntent.status === "requires_action" ||
paymentIntent.status === "processing" ||
paymentIntent.status === "requires_capture"
) {
Logger.warn(`Payment requires further action for team: ${team_id}`);
return { return_status: "requires_action", charge_id: paymentIntent.id };
} else {
Logger.error(`Payment failed for team: ${team_id}`);
return { return_status: "failed", charge_id: paymentIntent.id };
}
} catch (error) {
Logger.error(
`Failed to create or confirm PaymentIntent for team: ${team_id}`
);
console.error(error);
return { return_status: "failed", charge_id: "" };
}
}

View File

@ -70,7 +70,9 @@ export async function logJob(job: FirecrawlJob) {
retry: job.retry,
},
};
posthog.capture(phLog);
if(job.mode !== "single_urls") {
posthog.capture(phLog);
}
}
if (error) {
Logger.error(`Error logging job: ${error.message}`);

View File

@ -24,6 +24,14 @@ const emailTemplates: Record<
subject: "Rate Limit Reached - Firecrawl",
html: "Hey there,<br/><p>You've hit one of the Firecrawl endpoint's rate limit! Take a breather and try again in a few moments. If you need higher rate limits, consider upgrading your plan. Check out our <a href='https://firecrawl.dev/pricing'>pricing page</a> for more info.</p><p>If you have any questions, feel free to reach out to us at <a href='mailto:hello@firecrawl.com'>hello@firecrawl.com</a></p><br/>Thanks,<br/>Firecrawl Team<br/><br/>Ps. this email is only sent once every 7 days if you reach a rate limit.",
},
[NotificationType.AUTO_RECHARGE_SUCCESS]: {
subject: "Auto recharge successful - Firecrawl",
html: "Hey there,<br/><p>Your account was successfully recharged with 1000 credits because your remaining credits were below the threshold. Consider upgrading your plan at <a href='https://firecrawl.dev/pricing'>firecrawl.dev/pricing</a> to avoid hitting the limit.</p><br/>Thanks,<br/>Firecrawl Team<br/>",
},
[NotificationType.AUTO_RECHARGE_FAILED]: {
subject: "Auto recharge failed - Firecrawl",
html: "Hey there,<br/><p>Your auto recharge failed. Please try again manually. If the issue persists, please reach out to us at <a href='mailto:hello@firecrawl.com'>hello@firecrawl.com</a></p><br/>Thanks,<br/>Firecrawl Team<br/>",
},
};
export async function sendNotification(
@ -31,18 +39,20 @@ export async function sendNotification(
notificationType: NotificationType,
startDateString: string,
endDateString: string,
chunk: AuthCreditUsageChunk
chunk: AuthCreditUsageChunk,
bypassRecentChecks: boolean = false
) {
return withAuth(sendNotificationInternal)(
team_id,
notificationType,
startDateString,
endDateString,
chunk
chunk,
bypassRecentChecks
);
}
async function sendEmailNotification(
export async function sendEmailNotification(
email: string,
notificationType: NotificationType,
) {
@ -72,90 +82,94 @@ export async function sendNotificationInternal(
notificationType: NotificationType,
startDateString: string,
endDateString: string,
chunk: AuthCreditUsageChunk
chunk: AuthCreditUsageChunk,
bypassRecentChecks: boolean = false
): Promise<{ success: boolean }> {
if (team_id === "preview") {
return { success: true };
}
const fifteenDaysAgo = new Date();
fifteenDaysAgo.setDate(fifteenDaysAgo.getDate() - 15);
if (!bypassRecentChecks) {
const fifteenDaysAgo = new Date();
fifteenDaysAgo.setDate(fifteenDaysAgo.getDate() - 15);
const { data, error } = await supabase_service
.from("user_notifications")
.select("*")
.eq("team_id", team_id)
.eq("notification_type", notificationType)
.gte("sent_date", fifteenDaysAgo.toISOString());
if (error) {
Logger.debug(`Error fetching notifications: ${error}`);
return { success: false };
}
if (data.length !== 0) {
// Logger.debug(`Notification already sent for team_id: ${team_id} and notificationType: ${notificationType} in the last 15 days`);
return { success: false };
}
const { data: recentData, error: recentError } = await supabase_service
.from("user_notifications")
.select("*")
.eq("team_id", team_id)
.eq("notification_type", notificationType)
.gte("sent_date", startDateString)
.lte("sent_date", endDateString);
if (recentError) {
Logger.debug(`Error fetching recent notifications: ${recentError}`);
return { success: false };
}
if (recentData.length !== 0) {
// Logger.debug(`Notification already sent for team_id: ${team_id} and notificationType: ${notificationType} within the specified date range`);
return { success: false };
} else {
console.log(`Sending notification for team_id: ${team_id} and notificationType: ${notificationType}`);
// get the emails from the user with the team_id
const { data: emails, error: emailsError } = await supabase_service
.from("users")
.select("email")
.eq("team_id", team_id);
if (emailsError) {
Logger.debug(`Error fetching emails: ${emailsError}`);
return { success: false };
}
for (const email of emails) {
await sendEmailNotification(email.email, notificationType);
}
const { error: insertError } = await supabase_service
const { data, error } = await supabase_service
.from("user_notifications")
.insert([
{
team_id: team_id,
notification_type: notificationType,
sent_date: new Date().toISOString(),
},
]);
.select("*")
.eq("team_id", team_id)
.eq("notification_type", notificationType)
.gte("sent_date", fifteenDaysAgo.toISOString());
if (process.env.SLACK_ADMIN_WEBHOOK_URL && emails.length > 0) {
sendSlackWebhook(
`${getNotificationString(notificationType)}: Team ${team_id}, with email ${emails[0].email}. Number of credits used: ${chunk.adjusted_credits_used} | Number of credits in the plan: ${chunk.price_credits}`,
false,
process.env.SLACK_ADMIN_WEBHOOK_URL
).catch((error) => {
Logger.debug(`Error sending slack notification: ${error}`);
});
}
if (insertError) {
Logger.debug(`Error inserting notification record: ${insertError}`);
if (error) {
Logger.debug(`Error fetching notifications: ${error}`);
return { success: false };
}
return { success: true };
if (data.length !== 0) {
return { success: false };
}
// TODO: observation: Free credits people are not receiving notifications
const { data: recentData, error: recentError } = await supabase_service
.from("user_notifications")
.select("*")
.eq("team_id", team_id)
.eq("notification_type", notificationType)
.gte("sent_date", startDateString)
.lte("sent_date", endDateString);
if (recentError) {
Logger.debug(`Error fetching recent notifications: ${recentError.message}`);
return { success: false };
}
if (recentData.length !== 0) {
return { success: false };
}
}
console.log(`Sending notification for team_id: ${team_id} and notificationType: ${notificationType}`);
// get the emails from the user with the team_id
const { data: emails, error: emailsError } = await supabase_service
.from("users")
.select("email")
.eq("team_id", team_id);
if (emailsError) {
Logger.debug(`Error fetching emails: ${emailsError}`);
return { success: false };
}
for (const email of emails) {
await sendEmailNotification(email.email, notificationType);
}
const { error: insertError } = await supabase_service
.from("user_notifications")
.insert([
{
team_id: team_id,
notification_type: notificationType,
sent_date: new Date().toISOString(),
},
]);
if (process.env.SLACK_ADMIN_WEBHOOK_URL && emails.length > 0) {
sendSlackWebhook(
`${getNotificationString(notificationType)}: Team ${team_id}, with email ${emails[0].email}. Number of credits used: ${chunk.adjusted_credits_used} | Number of credits in the plan: ${chunk.price_credits}`,
false,
process.env.SLACK_ADMIN_WEBHOOK_URL
).catch((error) => {
Logger.debug(`Error sending slack notification: ${error}`);
});
}
if (insertError) {
Logger.debug(`Error inserting notification record: ${insertError}`);
return { success: false };
}
return { success: true };
}

View File

@ -11,6 +11,10 @@ export function getNotificationString(
return "Limit reached (100%)";
case NotificationType.RATE_LIMIT_REACHED:
return "Rate limit reached";
case NotificationType.AUTO_RECHARGE_SUCCESS:
return "Auto-recharge successful";
case NotificationType.AUTO_RECHARGE_FAILED:
return "Auto-recharge failed";
default:
return "Unknown notification type";
}

View File

@ -15,6 +15,7 @@ const RATE_LIMITS = {
standardnew: 10,
growth: 50,
growthdouble: 50,
etier2c: 300,
},
scrape: {
default: 20,
@ -28,6 +29,7 @@ const RATE_LIMITS = {
standardnew: 100,
growth: 1000,
growthdouble: 1000,
etier2c: 2500,
},
search: {
default: 20,
@ -41,6 +43,7 @@ const RATE_LIMITS = {
standardnew: 50,
growth: 500,
growthdouble: 500,
etier2c: 2500,
},
map:{
default: 20,
@ -54,6 +57,7 @@ const RATE_LIMITS = {
standardnew: 50,
growth: 500,
growthdouble: 500,
etier2c: 2500,
},
preview: {
free: 5,

View File

@ -130,6 +130,8 @@ export enum NotificationType {
APPROACHING_LIMIT = "approachingLimit",
LIMIT_REACHED = "limitReached",
RATE_LIMIT_REACHED = "rateLimitReached",
AUTO_RECHARGE_SUCCESS = "autoRechargeSuccess",
AUTO_RECHARGE_FAILED = "autoRechargeFailed",
}
export type ScrapeLog = {
@ -155,6 +157,7 @@ export type PlanType =
| "standardnew"
| "growth"
| "growthdouble"
| "etier2c"
| "free"
| "";

View File

@ -147,7 +147,7 @@ watch.addEventListener("done", state => {
### Batch scraping multiple URLs
To batch scrape multiple URLs with error handling, use the `batchScrapeUrls` method. It takes the starting URLs and optional parameters as arguments. The `params` argument allows you to specify additional options for the crawl job, such as the output formats.
To batch scrape multiple URLs with error handling, use the `batchScrapeUrls` method. It takes the starting URLs and optional parameters as arguments. The `params` argument allows you to specify additional options for the batch scrape job, such as the output formats.
```js
const batchScrapeResponse = await app.batchScrapeUrls(['https://firecrawl.dev', 'https://mendable.ai'], {
@ -158,10 +158,10 @@ const batchScrapeResponse = await app.batchScrapeUrls(['https://firecrawl.dev',
#### Asynchronous batch scrape
To initiate an asynchronous batch scrape, utilize the `asyncBulkScrapeUrls` method. This method requires the starting URLs and optional parameters as inputs. The params argument enables you to define various settings for the scrape, such as the output formats. Upon successful initiation, this method returns an ID, which is essential for subsequently checking the status of the batch scrape.
To initiate an asynchronous batch scrape, utilize the `asyncBatchScrapeUrls` method. This method requires the starting URLs and optional parameters as inputs. The params argument enables you to define various settings for the scrape, such as the output formats. Upon successful initiation, this method returns an ID, which is essential for subsequently checking the status of the batch scrape.
```js
const asyncBulkScrapeResult = await app.asyncBulkScrapeUrls(['https://firecrawl.dev', 'https://mendable.ai'], { formats: ['markdown', 'html'] });
const asyncBatchScrapeResult = await app.asyncBatchScrapeUrls(['https://firecrawl.dev', 'https://mendable.ai'], { formats: ['markdown', 'html'] });
```
#### Batch scrape with WebSockets

View File

@ -1,6 +1,6 @@
{
"name": "@mendable/firecrawl-js",
"version": "1.7.1",
"version": "1.7.2",
"description": "JavaScript SDK for Firecrawl API",
"main": "dist/index.js",
"types": "dist/index.d.ts",

View File

@ -82,6 +82,10 @@ export interface CrawlScrapeOptions {
onlyMainContent?: boolean;
waitFor?: number;
timeout?: number;
location?: {
country?: string;
languages?: string[];
};
}
export type Action = {

View File

@ -170,11 +170,11 @@ print(batch_scrape_result)
### Checking batch scrape status
To check the status of an asynchronous batch scrape job, use the `check_batch_scrape_job` method. It takes the job ID as a parameter and returns the current status of the batch scrape job.
To check the status of an asynchronous batch scrape job, use the `check_batch_scrape_status` method. It takes the job ID as a parameter and returns the current status of the batch scrape job.
```python
id = batch_scrape_result['id']
status = app.check_batch_scrape_job(id)
status = app.check_batch_scrape_status(id)
```
### Batch scrape with WebSockets

View File

@ -0,0 +1,180 @@
import os
from firecrawl import FirecrawlApp
import json
from dotenv import load_dotenv
import anthropic
from e2b_code_interpreter import Sandbox
import base64
# ANSI color codes
class Colors:
CYAN = '\033[96m'
YELLOW = '\033[93m'
GREEN = '\033[92m'
RED = '\033[91m'
MAGENTA = '\033[95m'
BLUE = '\033[94m'
RESET = '\033[0m'
# Load environment variables
load_dotenv()
# Retrieve API keys from environment variables
firecrawl_api_key = os.getenv("FIRECRAWL_API_KEY")
anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
e2b_api_key = os.getenv("E2B_API_KEY")
# Initialize the FirecrawlApp and Anthropic client
app = FirecrawlApp(api_key=firecrawl_api_key)
client = anthropic.Anthropic(api_key=anthropic_api_key)
sandbox = Sandbox(api_key=e2b_api_key)
# Find the relevant stock pages via map
def find_relevant_page_via_map(stock_search_term, url, app):
try:
print(f"{Colors.CYAN}Searching for stock: {stock_search_term}{Colors.RESET}")
print(f"{Colors.CYAN}Initiating search on the website: {url}{Colors.RESET}")
map_search_parameter = stock_search_term
print(f"{Colors.GREEN}Search parameter: {map_search_parameter}{Colors.RESET}")
print(f"{Colors.YELLOW}Mapping website using the identified search parameter...{Colors.RESET}")
map_website = app.map_url(url, params={"search": map_search_parameter})
print(f"{Colors.GREEN}Website mapping completed successfully.{Colors.RESET}")
print(f"{Colors.GREEN}Located {len(map_website['links'])} relevant links.{Colors.RESET}")
return map_website['links']
except Exception as e:
print(f"{Colors.RED}Error encountered during relevant page identification: {str(e)}{Colors.RESET}")
return None
# Function to plot the scores using e2b
def plot_scores(stock_names, stock_scores):
print(f"{Colors.YELLOW}Plotting scores...{Colors.RESET}")
code_to_run = f"""
import matplotlib.pyplot as plt
stock_names = {stock_names}
stock_scores = {stock_scores}
plt.figure(figsize=(10, 5))
plt.bar(stock_names, stock_scores, color='blue')
plt.xlabel('Stock Names')
plt.ylabel('Scores')
plt.title('Stock Investment Scores')
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('chart.png')
plt.show()
"""
# Run the code inside the sandbox
execution = sandbox.run_code(code_to_run)
# Check if there are any results
if execution.results and execution.results[0].png:
first_result = execution.results[0]
# Get the directory where the current python file is located
current_dir = os.path.dirname(os.path.abspath(__file__))
# Save the png to a file in the examples directory. The png is in base64 format.
with open(os.path.join(current_dir, 'chart.png'), 'wb') as f:
f.write(base64.b64decode(first_result.png))
print('Chart saved as examples/chart.png')
else:
print(f"{Colors.RED}No results returned from the sandbox execution.{Colors.RESET}")
# Analyze the top stocks and provide investment recommendation
def analyze_top_stocks(map_website, app, client):
try:
# Get top 5 links from the map result
top_links = map_website[:10]
print(f"{Colors.CYAN}Proceeding to analyze top {len(top_links)} links: {top_links}{Colors.RESET}")
# Scrape the pages in batch
batch_scrape_result = app.batch_scrape_urls(top_links, {'formats': ['markdown']})
print(f"{Colors.GREEN}Batch page scraping completed successfully.{Colors.RESET}")
# Prepare content for LLM
stock_contents = []
for scrape_result in batch_scrape_result['data']:
stock_contents.append({
'content': scrape_result['markdown']
})
# Pass all the content to the LLM to analyze and decide which stock to invest in
analyze_prompt = f"""
Based on the following information about different stocks from their Robinhood pages, analyze and determine which stock is the best investment opportunity. DO NOT include any other text, just the JSON.
Return the result in the following JSON format. Only return the JSON, nothing else. Do not include backticks or any other formatting, just the JSON.
{{
"scores": [
{{
"stock_name": "<stock_name>",
"score": <score-out-of-100>
}},
...
]
}}
Stock Information:
"""
for stock in stock_contents:
analyze_prompt += f"Content:\n{stock['content']}\n"
print(f"{Colors.YELLOW}Analyzing stock information with LLM...{Colors.RESET}")
analyze_prompt += f"\n\nStart JSON:\n"
completion = client.messages.create(
model="claude-3-5-sonnet-20240620",
max_tokens=1000,
temperature=0,
system="You are a financial analyst. Only return the JSON, nothing else.",
messages=[
{
"role": "user",
"content": analyze_prompt
}
]
)
result = completion.content[0].text
print(f"{Colors.GREEN}Analysis completed. Here is the recommendation:{Colors.RESET}")
print(f"{Colors.MAGENTA}{result}{Colors.RESET}")
# Plot the scores using e2b
try:
result_json = json.loads(result)
scores = result_json['scores']
stock_names = [score['stock_name'] for score in scores]
stock_scores = [score['score'] for score in scores]
plot_scores(stock_names, stock_scores)
except json.JSONDecodeError as json_err:
print(f"{Colors.RED}Error decoding JSON response: {str(json_err)}{Colors.RESET}")
except Exception as e:
print(f"{Colors.RED}Error encountered during stock analysis: {str(e)}{Colors.RESET}")
# Main function to execute the process
def main():
# Get user input
stock_search_term = input(f"{Colors.BLUE}Enter the stock you're interested in: {Colors.RESET}")
if not stock_search_term.strip():
print(f"{Colors.RED}No stock entered. Exiting.{Colors.RESET}")
return
url = "https://robinhood.com/stocks"
print(f"{Colors.YELLOW}Initiating stock analysis process...{Colors.RESET}")
# Find the relevant pages
map_website = find_relevant_page_via_map(stock_search_term, url, app)
if map_website:
print(f"{Colors.GREEN}Relevant stock pages identified. Proceeding with detailed analysis...{Colors.RESET}")
# Analyze top stocks
analyze_top_stocks(map_website, app, client)
else:
print(f"{Colors.RED}No relevant stock pages identified. Consider refining the search term or trying a different stock.{Colors.RESET}")
if __name__ == "__main__":
main()

View File

@ -3,6 +3,7 @@ from firecrawl import FirecrawlApp
import json
from dotenv import load_dotenv
import anthropic
import agentops
# ANSI color codes
class Colors:
@ -161,4 +162,5 @@ def main():
print(f"{Colors.RED}No relevant pages identified. Consider refining the search parameters or trying a different website.{Colors.RESET}")
if __name__ == "__main__":
agentops.init(os.getenv("AGENTOPS_API_KEY"))
main()

View File

@ -98,7 +98,7 @@
"source": [
"# Create a cache with a 5 minute TTL\n",
"cache = caching.CachedContent.create(\n",
" model=\"models/gemini-1.5-pro-001\",\n",
" model=\"models/gemini-1.5-pro-002\",\n",
" display_name=\"website crawl testing again\", # used to identify the cache\n",
" system_instruction=\"You are an expert at this website, and your job is to answer user's query based on the website you have access to.\",\n",
" contents=[text_file],\n",

View File

@ -0,0 +1,166 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/ericciarla/projects/python_projects/agents_testing/.conda/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
}
],
"source": [
"import os\n",
"import datetime\n",
"import time\n",
"import google.generativeai as genai\n",
"from google.generativeai import caching\n",
"from dotenv import load_dotenv\n",
"from firecrawl import FirecrawlApp\n",
"import json\n",
"\n",
"# Load environment variables\n",
"load_dotenv()\n",
"\n",
"# Retrieve API keys from environment variables\n",
"google_api_key = os.getenv(\"GOOGLE_API_KEY\")\n",
"firecrawl_api_key = os.getenv(\"FIRECRAWL_API_KEY\")\n",
"\n",
"# Configure the Google Generative AI module with the API key\n",
"genai.configure(api_key=google_api_key)\n",
"\n",
"# Initialize the FirecrawlApp with your API key\n",
"app = FirecrawlApp(api_key=firecrawl_api_key)\n"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"No data returned from crawl.\n"
]
}
],
"source": [
"# Crawl a website\n",
"crawl_url = 'https://dify.ai/'\n",
"params = {\n",
" \n",
" 'crawlOptions': {\n",
" 'limit': 100\n",
" }\n",
"}\n",
"crawl_result = app.crawl_url(crawl_url, params=params)\n",
"\n",
"if crawl_result is not None:\n",
" # Convert crawl results to JSON format, excluding 'content' field from each entry\n",
" cleaned_crawl_result = [{k: v for k, v in entry.items() if k != 'content'} for entry in crawl_result]\n",
"\n",
" # Save the modified results as a text file containing JSON data\n",
" with open('crawl_result.txt', 'w') as file:\n",
" file.write(json.dumps(cleaned_crawl_result, indent=4))\n",
"else:\n",
" print(\"No data returned from crawl.\")\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"# Upload the video using the Files API\n",
"text_file = genai.upload_file(path=\"crawl_result.txt\")\n",
"\n",
"# Wait for the file to finish processing\n",
"while text_file.state.name == \"PROCESSING\":\n",
" print('Waiting for file to be processed.')\n",
" time.sleep(2)\n",
" text_file = genai.get_file(text_file.name)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"# Create a cache with a 5 minute TTL\n",
"cache = caching.CachedContent.create(\n",
" model=\"models/gemini-1.5-flash-002\",\n",
" display_name=\"website crawl testing again\", # used to identify the cache\n",
" system_instruction=\"You are an expert at this website, and your job is to answer user's query based on the website you have access to.\",\n",
" contents=[text_file],\n",
" ttl=datetime.timedelta(minutes=15),\n",
")\n",
"# Construct a GenerativeModel which uses the created cache.\n",
"model = genai.GenerativeModel.from_cached_content(cached_content=cache)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dify.AI utilizes the **Firecrawl** service for website scraping. This service can crawl and convert any website into clean markdown or structured data that's ready for use in building RAG applications. \n",
"\n",
"Here's how Firecrawl helps:\n",
"\n",
"* **Crawling and Conversion:** Firecrawl crawls the website and converts the content into a format that is easily understood by LLMs, such as markdown or structured data.\n",
"* **Clean Output:** Firecrawl ensures the data is clean and free of errors, making it easier to use in Dify's RAG engine.\n",
"* **Parallel Crawling:** Firecrawl efficiently crawls web pages in parallel, delivering results quickly.\n",
"\n",
"You can find Firecrawl on their website: [https://www.firecrawl.dev/](https://www.firecrawl.dev/)\n",
"\n",
"Firecrawl offers both a cloud service and an open-source software (OSS) edition. \n",
"\n"
]
}
],
"source": [
"# Query the model\n",
"response = model.generate_content([\"What powers website scraping with Dify?\"])\n",
"response_dict = response.to_dict()\n",
"response_text = response_dict['candidates'][0]['content']['parts'][0]['text']\n",
"print(response_text)\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 2
}