Add llmstxt generator endpoint (#1201)

* Nick: * Revert "fix(v1/types): fix extract -> json rename (FIR-1072) (#1195)" This reverts commit 586a10f40d354a038afc2b67809f20a7a829f8cb. * Update deep-research-service.ts * Nick: * init * part 2 * Update generate-llmstxt-service.ts * Fix queue * Update queue-worker.ts * Almost there * Final touches * Update requests.http * final touches * Update requests.http * Improve logging * Change endpoint to /llmstxt * Update queue-worker.ts * Update generate-llmstxt-service.ts * Nick: cache * Update index.ts * Update firecrawl.py * Update package.json --------- Co-authored-by: Nicolas <nicolascamara29@gmail.com> Co-authored-by: Gergő Móricz <mo.geryy@gmail.com>
2025-08-14 00:15:57 +08:00 · 2025-02-19 12:42:33 -05:00 · 2025-02-19 12:42:33 -05:00 · d984b50400
commit d984b50400
parent e373fab5c1
14 changed files with 867 additions and 2 deletions
--- a/apps/api/requests.http
+++ b/apps/api/requests.http
@ -100,3 +100,23 @@ Authorization: Bearer {{$dotenv TEST_API_KEY}}
 ###
 DELETE {{baseUrl}}/v1/crawl/c94136f9-86c1-4a97-966c-1c8e0274778f HTTP/1.1
 Authorization: Bearer {{$dotenv TEST_API_KEY}}
 ### Generate LLMs TXT
 # @name llmsTxt
 POST {{baseUrl}}/v1/llmstxt HTTP/1.1
 Authorization: Bearer {{$dotenv TEST_API_KEY}}
 content-type: application/json
 {
  "url": "https://firecrawl.dev",
  "maxUrls": 2,
  "showFullText": false
 }
 ### Check Generate LLMs TXT Status
@llmsTxtId = {{llmsTxt.response.body.$.id}}
 # @name llmsTxtStatus
 GET {{baseUrl}}/v1/llmstxt/{{llmsTxtId}} HTTP/1.1
 Authorization: Bearer {{$dotenv TEST_API_KEY}}
--- a/apps/api/src/controllers/v1/deep-research-status.ts
+++ b/apps/api/src/controllers/v1/deep-research-status.ts
@ -21,7 +21,7 @@ export async function deepResearchStatusController(
  let data: any = null;
-  if (research.status === "completed") {
+  if (research.status === "completed" && process.env.USE_DB_AUTHENTICATION === "true") {
    const jobData = await supabaseGetJobsById([req.params.jobId]);
    if (jobData && jobData.length > 0) {
      data = jobData[0].docs[0];
--- a/apps/api/src/controllers/v1/generate-llmstxt-status.ts
+++ b/apps/api/src/controllers/v1/generate-llmstxt-status.ts
@ -0,0 +1,41 @@
 import { Response } from "express";
 import { RequestWithAuth } from "./types";
 import { getGeneratedLlmsTxt, getGeneratedLlmsTxtExpiry } from "../../lib/generate-llmstxt/generate-llmstxt-redis";
 import { supabaseGetJobsById } from "../../lib/supabase-jobs";
 export async function generateLLMsTextStatusController(
  req: RequestWithAuth<{ jobId: string }, any, any>,
  res: Response,
 ) {
  const generation = await getGeneratedLlmsTxt(req.params.jobId);
  const showFullText = generation?.showFullText ?? false;
  if (!generation) {
    return res.status(404).json({
      success: false,
      error: "llmsTxt generation job not found",
    });
  }
  let data: any = null;
  if (showFullText) {
    data = {
      llmstxt: generation.generatedText,
      llmsfulltxt: generation.fullText,
    };
  } else {
    data = {
      llmstxt: generation.generatedText,
    };
  }
  return res.status(200).json({
    success: generation.status === "failed" ? false : true,
    data: data,
    status: generation.status,
    error: generation?.error ?? undefined,
    expiresAt: (await getGeneratedLlmsTxtExpiry(req.params.jobId)).toISOString(),
  });
 } 
--- a/apps/api/src/controllers/v1/generate-llmstxt.ts
+++ b/apps/api/src/controllers/v1/generate-llmstxt.ts
@ -0,0 +1,89 @@
 import { Response } from "express";
 import { RequestWithAuth } from "./types";
 import { getGenerateLlmsTxtQueue } from "../../services/queue-service";
 import * as Sentry from "@sentry/node";
 import { saveGeneratedLlmsTxt } from "../../lib/generate-llmstxt/generate-llmstxt-redis";
 import { z } from "zod";
 export const generateLLMsTextRequestSchema = z.object({
  url: z.string().url().describe('The URL to generate text from'),
  maxUrls: z.number().min(1).max(100).default(10).describe('Maximum number of URLs to process'),
  showFullText: z.boolean().default(false).describe('Whether to show the full LLMs-full.txt in the response'),
  __experimental_stream: z.boolean().optional(),
 });
 export type GenerateLLMsTextRequest = z.infer<typeof generateLLMsTextRequestSchema>;
 export type GenerateLLMsTextResponse = {
  success: boolean;
  id: string;
 };
 /**
 * Initiates a text generation job based on the provided URL.
 * @param req - The request object containing authentication and generation parameters.
 * @param res - The response object to send the generation job ID.
 * @returns A promise that resolves when the generation job is queued.
 */
 export async function generateLLMsTextController(
  req: RequestWithAuth<{}, GenerateLLMsTextResponse, GenerateLLMsTextRequest>,
  res: Response<GenerateLLMsTextResponse>,
 ) {
  req.body = generateLLMsTextRequestSchema.parse(req.body);
  const generationId = crypto.randomUUID();
  const jobData = {
    request: req.body,
    teamId: req.auth.team_id,
    plan: req.auth.plan,
    subId: req.acuc?.sub_id,
    generationId,
  };
  await saveGeneratedLlmsTxt(generationId, {
    id: generationId,
    team_id: req.auth.team_id,
    plan: req.auth.plan!, // Add non-null assertion since plan is required
    createdAt: Date.now(),
    status: "processing",
    url: req.body.url,
    maxUrls: req.body.maxUrls,
    showFullText: req.body.showFullText,
    generatedText: "",
    fullText: "",
  });
  if (Sentry.isInitialized()) {
    const size = JSON.stringify(jobData).length;
    await Sentry.startSpan(
      {
        name: "Add LLMstxt generation job",
        op: "queue.publish",
        attributes: {
          "messaging.message.id": generationId,
          "messaging.destination.name": getGenerateLlmsTxtQueue().name,
          "messaging.message.body.size": size,
        },
      },
      async (span) => {
        await getGenerateLlmsTxtQueue().add(generationId, {
          ...jobData,
          sentry: {
            trace: Sentry.spanToTraceHeader(span),
            baggage: Sentry.spanToBaggageHeader(span),
            size,
          },
        }, { jobId: generationId });
      },
    );
  } else {
    await getGenerateLlmsTxtQueue().add(generationId, jobData, {
      jobId: generationId,
    });
  }
  return res.status(200).json({
    success: true,
    id: generationId,
  });
 }
--- a/apps/api/src/index.ts
+++ b/apps/api/src/index.ts
@ -8,6 +8,7 @@ import {
  getExtractQueue,
  getScrapeQueue,
  getIndexQueue,
  getGenerateLlmsTxtQueue,
  getDeepResearchQueue,
 } from "./services/queue-service";
 import { v0Router } from "./routes/v0";
@ -55,6 +56,7 @@ const { addQueue, removeQueue, setQueues, replaceQueues } = createBullBoard({
    new BullAdapter(getScrapeQueue()),
    new BullAdapter(getExtractQueue()),
    new BullAdapter(getIndexQueue()),
    new BullAdapter(getGenerateLlmsTxtQueue()),
    new BullAdapter(getDeepResearchQueue()),
  ],
  serverAdapter: serverAdapter,
--- a/apps/api/src/lib/generate-llmstxt/generate-llmstxt-redis.ts
+++ b/apps/api/src/lib/generate-llmstxt/generate-llmstxt-redis.ts
@ -0,0 +1,70 @@
 import { redisConnection } from "../../services/queue-service";
 import { logger as _logger } from "../logger";
 export interface GenerationData {
  id: string;
  team_id: string;
  plan: string;
  createdAt: number;
  status: "processing" | "completed" | "failed";
  url: string;
  maxUrls: number;
  showFullText: boolean;
  generatedText: string;
  fullText: string;
  error?: string;
 }
 // TTL of 24 hours
 const GENERATION_TTL = 24 * 60 * 60;
 export async function saveGeneratedLlmsTxt(id: string, data: GenerationData): Promise<void> {
  _logger.debug("Saving llmstxt generation " + id + " to Redis...");
  await redisConnection.set("generation:" + id, JSON.stringify(data));
  await redisConnection.expire("generation:" + id, GENERATION_TTL);
 }
 export async function getGeneratedLlmsTxt(id: string): Promise<GenerationData | null> {
  const x = await redisConnection.get("generation:" + id);
  return x ? JSON.parse(x) : null;
 }
 export async function updateGeneratedLlmsTxt(
  id: string,
  data: Partial<GenerationData>,
 ): Promise<void> {
  const current = await getGeneratedLlmsTxt(id);
  if (!current) return;
  const updatedGeneration = {
    ...current,
    ...data
  };
  await redisConnection.set("generation:" + id, JSON.stringify(updatedGeneration));
  await redisConnection.expire("generation:" + id, GENERATION_TTL);
 }
 export async function getGeneratedLlmsTxtExpiry(id: string): Promise<Date> {
  const d = new Date();
  const ttl = await redisConnection.pttl("generation:" + id);
  d.setMilliseconds(d.getMilliseconds() + ttl);
  d.setMilliseconds(0);
  return d;
 }
 // Convenience method for status updates
 export async function updateGeneratedLlmsTxtStatus(
  id: string,
  status: "processing" | "completed" | "failed",
  generatedText?: string,
  fullText?: string,
  error?: string,
 ): Promise<void> {
  const updates: Partial<GenerationData> = { status };
  if (generatedText !== undefined) updates.generatedText = generatedText;
  if (fullText !== undefined) updates.fullText = fullText;
  if (error !== undefined) updates.error = error;
  await updateGeneratedLlmsTxt(id, updates);
 } 
--- a/apps/api/src/lib/generate-llmstxt/generate-llmstxt-service.ts
+++ b/apps/api/src/lib/generate-llmstxt/generate-llmstxt-service.ts
@ -0,0 +1,174 @@
 import { logger as _logger } from "../logger";
 import { updateGeneratedLlmsTxt } from "./generate-llmstxt-redis";
 import { getMapResults } from "../../controllers/v1/map";
 import { MapResponse, ScrapeResponse, Document } from "../../controllers/v1/types";
 import { Response } from "express";
 import OpenAI from "openai";
 import { zodResponseFormat } from "openai/helpers/zod";
 import { z } from "zod";
 import { scrapeDocument } from "../extract/document-scraper";
 import { PlanType } from "../../types";
 import { getLlmsTextFromCache, saveLlmsTextToCache } from "./generate-llmstxt-supabase";
 interface GenerateLLMsTextServiceOptions {
  generationId: string;
  teamId: string;
  plan: PlanType;
  url: string;
  maxUrls: number;
  showFullText: boolean;
 }
 const DescriptionSchema = z.object({
  description: z.string(),
  title: z.string(),
 });
 export async function performGenerateLlmsTxt(options: GenerateLLMsTextServiceOptions) {
  const openai = new OpenAI();
  const { generationId, teamId, plan, url, maxUrls, showFullText } = options;
  const logger = _logger.child({
    module: "generate-llmstxt",
    method: "performGenerateLlmsTxt",
    generationId,
    teamId,
  });
  try {
    // Check cache first
    const cachedResult = await getLlmsTextFromCache(url, maxUrls);
    if (cachedResult) {
      logger.info("Found cached LLMs text", { url });
      // Update final result with cached text
      await updateGeneratedLlmsTxt(generationId, {
        status: "completed",
        generatedText: cachedResult.llmstxt,
        fullText: cachedResult.llmstxt_full,
        showFullText: showFullText,
      });
      return {
        success: true,
        data: {
          generatedText: cachedResult.llmstxt,
          fullText: cachedResult.llmstxt_full,
          showFullText: showFullText,
        },
      };
    }
    // If not in cache, proceed with generation
    // First, get all URLs from the map controller
    const mapResult = await getMapResults({
      url,
      teamId,
      plan,
      limit: maxUrls,
      includeSubdomains: false,
      ignoreSitemap: false,
      includeMetadata: true,
    });
    if (!mapResult || !mapResult.links) {
      throw new Error(`Failed to map URLs`);
    }
    _logger.debug("Mapping URLs", mapResult.links);
    const urls = mapResult.links;
    let llmstxt = `# ${url} llms.txt\n\n`;
    let llmsFulltxt = `# ${url} llms-full.txt\n\n`;
    // Scrape each URL
    for (const url of urls) {
      _logger.debug(`Scraping URL: ${url}`);
      const document = await scrapeDocument(
        {
          url,
          teamId,
          plan,
          origin: url,
          timeout: 30000,
          isSingleUrl: true,
        },
        [],
        logger,
        { onlyMainContent: true }
      );
      if (!document) {
        logger.error(`Failed to scrape URL ${url}`);
        continue;
      }
      // Process scraped result
      if (!document.markdown) continue;
      _logger.debug(`Generating description for ${document.metadata?.url}`);
      const completion = await openai.beta.chat.completions.parse({
        model: "gpt-4o-mini",
        messages: [
          {
            role: "user", 
            content: `Generate a 9-10 word description and a 3-4 word title of the entire page based on ALL the content one will find on the page for this url: ${document.metadata?.url}. This will help in a user finding the page for its intended purpose. Here is the content: ${document.markdown}`
          }
        ],
        response_format: zodResponseFormat(DescriptionSchema, "description")
      });
      try {
        const parsedResponse = completion.choices[0].message.parsed;
        const description = parsedResponse!.description;
        const title = parsedResponse!.title;
        llmstxt += `- [${title}](${document.metadata?.url}): ${description}\n`;
        llmsFulltxt += `## ${title}\n${document.markdown}\n\n`;
        // Update progress with both generated text and full text
        await updateGeneratedLlmsTxt(generationId, {
          status: "processing", 
          generatedText: llmstxt,
          fullText: llmsFulltxt,
        });
      } catch (error) {
        logger.error(`Failed to parse LLM response for ${document.metadata?.url}`, { error });
        continue;
      }
    }
    // After successful generation, save to cache
    await saveLlmsTextToCache(url, llmstxt, llmsFulltxt, maxUrls);
    // Update final result with both generated text and full text
    await updateGeneratedLlmsTxt(generationId, {
      status: "completed",
      generatedText: llmstxt,
      fullText: llmsFulltxt,
      showFullText: showFullText,
    });
    return {
      success: true,
      data: {
        generatedText: llmstxt,
        fullText: llmsFulltxt,
        showFullText: showFullText,
      },
    };
  } catch (error: any) {
    logger.error("Generate LLMs text error", { error });
    await updateGeneratedLlmsTxt(generationId, {
      status: "failed",
      error: error.message || "Unknown error occurred",
    });
    throw error;
  }
 } 
--- a/apps/api/src/lib/generate-llmstxt/generate-llmstxt-supabase.ts
+++ b/apps/api/src/lib/generate-llmstxt/generate-llmstxt-supabase.ts
@ -0,0 +1,82 @@
 import { supabase_service } from "../../services/supabase";
 import { logger } from "../logger";
 import { normalizeUrlOnlyHostname } from "../canonical-url";
 interface LlmsTextCache {
  origin_url: string;
  llmstxt: string;
  llmstxt_full: string;
  max_urls: number;
 }
 export async function getLlmsTextFromCache(
  url: string,
  maxUrls: number,
 ): Promise<LlmsTextCache | null> {
  if (process.env.USE_DB_AUTHENTICATION !== "true") {
    return null;
  }
  const originUrl = normalizeUrlOnlyHostname(url);
  try {
    const { data, error } = await supabase_service
      .from("llm_texts")
      .select("*")
      .eq("origin_url", originUrl)
      .gte("max_urls", maxUrls) // Changed to gte since we want cached results with more URLs than requested
      .order("updated_at", { ascending: false })
      .limit(1)
      .single();
    if (error) {
      return null;
    }
    return data;
  } catch (error) {
    logger.error("Failed to fetch LLMs text from cache", { error, originUrl });
    return null;
  }
 }
 export async function saveLlmsTextToCache(
  url: string,
  llmstxt: string,
  llmstxt_full: string,
  maxUrls: number,
 ): Promise<void> {
  if (process.env.USE_DB_AUTHENTICATION !== "true") {
    return;
  }
  const originUrl = normalizeUrlOnlyHostname(url);
  try {
    // First check if there's an existing entry with fewer URLs
    const { data: existingData } = await supabase_service
      .from("llm_texts")
      .select("*")
      .eq("origin_url", originUrl)
      .single();
    // Always update the entry for the origin URL
    const { error } = await supabase_service
      .from("llm_texts")
      .update({
        llmstxt,
        llmstxt_full,
        max_urls: maxUrls,
        updated_at: new Date().toISOString(),
      })
      .eq("origin_url", originUrl);
    if (error) {
      logger.error("Error saving LLMs text to cache", { error, originUrl });
    } else {
      logger.debug("Successfully cached LLMs text", { originUrl, maxUrls });
    }
  } catch (error) {
    logger.error("Failed to save LLMs text to cache", { error, originUrl });
  }
 }
--- a/apps/api/src/routes/v1.ts
+++ b/apps/api/src/routes/v1.ts
@ -29,6 +29,8 @@ import { creditUsageController } from "../controllers/v1/credit-usage";
 import { BLOCKLISTED_URL_MESSAGE } from "../lib/strings";
 import { searchController } from "../controllers/v1/search";
 import { crawlErrorsController } from "../controllers/v1/crawl-errors";
 import { generateLLMsTextController } from "../controllers/v1/generate-llmstxt";
 import { generateLLMsTextStatusController } from "../controllers/v1/generate-llmstxt-status";
 import { deepResearchController } from "../controllers/v1/deep-research";
 import { deepResearchStatusController } from "../controllers/v1/deep-research-status";
@ -242,6 +244,18 @@ v1Router.get(
  wrap(extractStatusController),
 );
 v1Router.post(
  "/llmstxt",
  authMiddleware(RateLimiterMode.Extract),
  wrap(generateLLMsTextController),
 );
 v1Router.get(
  "/llmstxt/:jobId",
  authMiddleware(RateLimiterMode.ExtractStatus),
  wrap(generateLLMsTextStatusController),
 );
 v1Router.post(
  "/deep-research",
  authMiddleware(RateLimiterMode.Extract),
--- a/apps/api/src/services/queue-service.ts
+++ b/apps/api/src/services/queue-service.ts
@ -7,6 +7,7 @@ let extractQueue: Queue;
 let loggingQueue: Queue;
 let indexQueue: Queue;
 let deepResearchQueue: Queue;
 let generateLlmsTxtQueue: Queue;
 export const redisConnection = new IORedis(process.env.REDIS_URL!, {
  maxRetriesPerRequest: null,
@ -16,6 +17,7 @@ export const scrapeQueueName = "{scrapeQueue}";
 export const extractQueueName = "{extractQueue}";
 export const loggingQueueName = "{loggingQueue}";
 export const indexQueueName = "{indexQueue}";
 export const generateLlmsTxtQueueName = "{generateLlmsTxtQueue}";
 export const deepResearchQueueName = "{deepResearchQueue}";
 export function getScrapeQueue() {
@ -72,6 +74,24 @@ export function getIndexQueue() {
  return indexQueue;
 }
 export function getGenerateLlmsTxtQueue() {
  if (!generateLlmsTxtQueue) {
    generateLlmsTxtQueue = new Queue(generateLlmsTxtQueueName, {
      connection: redisConnection,
      defaultJobOptions: {
        removeOnComplete: {
          age: 90000, // 25 hours
        },
        removeOnFail: {
          age: 90000, // 25 hours
        },
      },
    });
    logger.info("LLMs TXT generation queue created");
  }
  return generateLlmsTxtQueue;
 }
 export function getDeepResearchQueue() {
  if (!deepResearchQueue) {
    deepResearchQueue = new Queue(deepResearchQueueName, {
--- a/apps/api/src/services/queue-worker.ts
+++ b/apps/api/src/services/queue-worker.ts
@ -11,6 +11,7 @@ import {
  extractQueueName,
  deepResearchQueueName,
  getIndexQueue,
  getGenerateLlmsTxtQueue,
 } from "./queue-service";
 import { startWebScraperPipeline } from "../main/runWebScraper";
 import { callWebhook } from "./webhook";
@ -69,6 +70,8 @@ import { billTeam } from "./billing/credit_billing";
 import { saveCrawlMap } from "./indexing/crawl-maps-index";
 import { updateDeepResearch } from "../lib/deep-research/deep-research-redis";
 import { performDeepResearch } from "../lib/deep-research/deep-research-service";
 import { performGenerateLlmsTxt } from "../lib/generate-llmstxt/generate-llmstxt-service";
 import { updateGeneratedLlmsTxt } from "../lib/generate-llmstxt/generate-llmstxt-redis";
 configDotenv();
@ -446,6 +449,76 @@ const processDeepResearchJobInternal = async (
  }
 };
 const processGenerateLlmsTxtJobInternal = async (
  token: string,
  job: Job & { id: string },
 ) => {
  const logger = _logger.child({
    module: "generate-llmstxt-worker",
    method: "processJobInternal", 
    jobId: job.id,
    generateId: job.data.generateId,
    teamId: job.data?.teamId ?? undefined,
  });
  const extendLockInterval = setInterval(async () => {
    logger.info(`🔄 Worker extending lock on job ${job.id}`);
    await job.extendLock(token, jobLockExtensionTime);
  }, jobLockExtendInterval);
  try {
    const result = await performGenerateLlmsTxt({
      generationId: job.data.generationId,
      teamId: job.data.teamId,
      plan: job.data.plan,
      url: job.data.request.url,
      maxUrls: job.data.request.maxUrls,
      showFullText: job.data.request.showFullText,
    });
    if (result.success) {
      await job.moveToCompleted(result, token, false);
      await updateGeneratedLlmsTxt(job.data.generateId, {
        status: "completed",
        generatedText: result.data.generatedText,
        fullText: result.data.fullText,
      });
      return result;
    } else {
      const error = new Error("LLMs text generation failed without specific error");
      await job.moveToFailed(error, token, false);
      await updateGeneratedLlmsTxt(job.data.generateId, {
        status: "failed",
        error: error.message,
      });
      return { success: false, error: error.message };
    }
  } catch (error) {
    logger.error(`🚫 Job errored ${job.id} - ${error}`, { error });
    Sentry.captureException(error, {
      data: {
        job: job.id,
      },
    });
    try {
      await job.moveToFailed(error, token, false);
    } catch (e) {
      logger.error("Failed to move job to failed state in Redis", { error });
    }
    await updateGeneratedLlmsTxt(job.data.generateId, {
      status: "failed", 
      error: error.message || "Unknown error occurred",
    });
    return { success: false, error: error.message || "Unknown error occurred" };
  } finally {
    clearInterval(extendLockInterval);
  }
 };
 let isShuttingDown = false;
 process.on("SIGINT", () => {
@ -1170,6 +1243,7 @@ async function processJob(job: Job & { id: string }, token: string) {
    workerFun(getScrapeQueue(), processJobInternal),
    workerFun(getExtractQueue(), processExtractJobInternal),
    workerFun(getDeepResearchQueue(), processDeepResearchJobInternal),
    workerFun(getGenerateLlmsTxtQueue(), processGenerateLlmsTxtJobInternal),
  ]);
  console.log("All workers exited. Waiting for all jobs to finish...");
--- a/apps/js-sdk/firecrawl/package.json
+++ b/apps/js-sdk/firecrawl/package.json
@ -1,6 +1,6 @@
 {
  "name": "@mendable/firecrawl-js",
-  "version": "1.18.1",
+  "version": "1.18.2",
  "description": "JavaScript SDK for Firecrawl API",
  "main": "dist/index.js",
  "types": "dist/index.d.ts",
--- a/apps/js-sdk/firecrawl/src/index.ts
+++ b/apps/js-sdk/firecrawl/src/index.ts
@ -413,6 +413,48 @@ export interface DeepResearchStatusResponse {
  summaries: string[];
 }
 /**
 * Parameters for LLMs.txt generation operations.
 */
 export interface GenerateLLMsTextParams {
  /**
   * Maximum number of URLs to process (1-100)
   * @default 10
   */
  maxUrls?: number;
  /**
   * Whether to show the full LLMs-full.txt in the response
   * @default false
   */
  showFullText?: boolean;
  /**
   * Experimental flag for streaming
   */
  __experimental_stream?: boolean;
 }
 /**
 * Response interface for LLMs.txt generation operations.
 */
 export interface GenerateLLMsTextResponse {
  success: boolean;
  id: string;
 }
 /**
 * Status response interface for LLMs.txt generation operations.
 */
 export interface GenerateLLMsTextStatusResponse {
  success: boolean;
  data: {
    llmstxt: string;
    llmsfulltxt?: string;
  };
  status: "processing" | "completed" | "failed";
  error?: string;
  expiresAt: string;
 }
 /**
 * Main class for interacting with the Firecrawl API.
 * Provides methods for scraping, searching, crawling, and mapping web content.
@ -1459,6 +1501,118 @@ export default class FirecrawlApp {
    }
    return { success: false, error: "Internal server error." };
  }
  /**
   * Generates LLMs.txt for a given URL and polls until completion.
   * @param url - The URL to generate LLMs.txt from.
   * @param params - Parameters for the LLMs.txt generation operation.
   * @returns The final generation results.
   */
  async generateLLMsText(url: string, params?: GenerateLLMsTextParams): Promise<GenerateLLMsTextStatusResponse | ErrorResponse> {
    try {
      const response = await this.asyncGenerateLLMsText(url, params);
      if (!response.success || 'error' in response) {
        return { success: false, error: 'error' in response ? response.error : 'Unknown error' };
      }
      if (!response.id) {
        throw new FirecrawlError(`Failed to start LLMs.txt generation. No job ID returned.`, 500);
      }
      const jobId = response.id;
      let generationStatus;
      while (true) {
        generationStatus = await this.checkGenerateLLMsTextStatus(jobId);
        if ('error' in generationStatus && !generationStatus.success) {
          return generationStatus;
        }
        if (generationStatus.status === "completed") {
          return generationStatus;
        }
        if (generationStatus.status === "failed") {
          throw new FirecrawlError(
            `LLMs.txt generation job ${generationStatus.status}. Error: ${generationStatus.error}`, 
            500
          );
        }
        if (generationStatus.status !== "processing") {
          break;
        }
        await new Promise(resolve => setTimeout(resolve, 2000));
      }
      return { success: false, error: "LLMs.txt generation job terminated unexpectedly" };
    } catch (error: any) {
      throw new FirecrawlError(error.message, 500, error.response?.data?.details);
    }
  }
  /**
   * Initiates a LLMs.txt generation operation without polling.
   * @param url - The URL to generate LLMs.txt from.
   * @param params - Parameters for the LLMs.txt generation operation.
   * @returns The response containing the generation job ID.
   */
  async asyncGenerateLLMsText(url: string, params?: GenerateLLMsTextParams): Promise<GenerateLLMsTextResponse | ErrorResponse> {
    const headers = this.prepareHeaders();
    try {
      const response: AxiosResponse = await this.postRequest(
        `${this.apiUrl}/v1/llmstxt`,
        { url, ...params },
        headers
      );
      if (response.status === 200) {
        return response.data;
      } else {
        this.handleError(response, "start LLMs.txt generation");
      }
    } catch (error: any) {
      if (error.response?.data?.error) {
        throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`, error.response.status);
      } else {
        throw new FirecrawlError(error.message, 500);
      }
    }
    return { success: false, error: "Internal server error." };
  }
  /**
   * Checks the status of a LLMs.txt generation operation.
   * @param id - The ID of the LLMs.txt generation operation.
   * @returns The current status and results of the generation operation.
   */
  async checkGenerateLLMsTextStatus(id: string): Promise<GenerateLLMsTextStatusResponse | ErrorResponse> {
    const headers = this.prepareHeaders();
    try {
      const response: AxiosResponse = await this.getRequest(
        `${this.apiUrl}/v1/llmstxt/${id}`,
        headers
      );
      if (response.status === 200) {
        return response.data;
      } else if (response.status === 404) {
        throw new FirecrawlError("LLMs.txt generation job not found", 404);
      } else {
        this.handleError(response, "check LLMs.txt generation status");
      }
    } catch (error: any) {
      if (error.response?.data?.error) {
        throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`, error.response.status);
      } else {
        throw new FirecrawlError(error.message, 500);
      }
    }
    return { success: false, error: "Internal server error." };
  }
 }
 interface CrawlWatcherEvents {
--- a/apps/python-sdk/firecrawl/firecrawl.py
+++ b/apps/python-sdk/firecrawl/firecrawl.py
@ -33,6 +33,14 @@ class SearchParams(pydantic.BaseModel):
    timeout: Optional[int] = 60000
    scrapeOptions: Optional[Dict[str, Any]] = None
 class GenerateLLMsTextParams(pydantic.BaseModel):
    """
    Parameters for the LLMs.txt generation operation.
    """
    maxUrls: Optional[int] = 10
    showFullText: Optional[bool] = False
    __experimental_stream: Optional[bool] = None
 class FirecrawlApp:
    class SearchResponse(pydantic.BaseModel):
        """
@ -756,6 +764,123 @@ class FirecrawlApp:
        except Exception as e:
            raise ValueError(str(e), 500)
    def generate_llms_text(self, url: str, params: Optional[Union[Dict[str, Any], GenerateLLMsTextParams]] = None) -> Dict[str, Any]:
        """
        Generate LLMs.txt for a given URL and poll until completion.
        Args:
            url (str): The URL to generate LLMs.txt from.
            params (Optional[Union[Dict[str, Any], GenerateLLMsTextParams]]): Parameters for the LLMs.txt generation.
        Returns:
            Dict[str, Any]: A dictionary containing the generation results. The structure includes:
                - 'success' (bool): Indicates if the generation was successful.
                - 'status' (str): The final status of the generation job.
                - 'data' (Dict): The generated LLMs.txt data.
                - 'error' (Optional[str]): Error message if the generation failed.
                - 'expiresAt' (str): ISO 8601 formatted date-time string indicating when the data expires.
        Raises:
            Exception: If the generation job fails or an error occurs during status checks.
        """
        if params is None:
            params = {}
        if isinstance(params, dict):
            generation_params = GenerateLLMsTextParams(**params)
        else:
            generation_params = params
        response = self.async_generate_llms_text(url, generation_params)
        if not response.get('success') or 'id' not in response:
            return response
        job_id = response['id']
        while True:
            status = self.check_generate_llms_text_status(job_id)
            if status['status'] == 'completed':
                return status
            elif status['status'] == 'failed':
                raise Exception(f'LLMs.txt generation failed. Error: {status.get("error")}')
            elif status['status'] != 'processing':
                break
            time.sleep(2)  # Polling interval
        return {'success': False, 'error': 'LLMs.txt generation job terminated unexpectedly'}
    def async_generate_llms_text(self, url: str, params: Optional[Union[Dict[str, Any], GenerateLLMsTextParams]] = None) -> Dict[str, Any]:
        """
        Initiate an asynchronous LLMs.txt generation operation.
        Args:
            url (str): The URL to generate LLMs.txt from.
            params (Optional[Union[Dict[str, Any], GenerateLLMsTextParams]]): Parameters for the LLMs.txt generation.
        Returns:
            Dict[str, Any]: A dictionary containing the generation initiation response. The structure includes:
                - 'success' (bool): Indicates if the generation initiation was successful.
                - 'id' (str): The unique identifier for the generation job.
        Raises:
            Exception: If the generation job initiation fails.
        """
        if params is None:
            params = {}
        if isinstance(params, dict):
            generation_params = GenerateLLMsTextParams(**params)
        else:
            generation_params = params
        headers = self._prepare_headers()
        json_data = {'url': url, **generation_params.dict(exclude_none=True)}
        try:
            response = self._post_request(f'{self.api_url}/v1/llmstxt', json_data, headers)
            if response.status_code == 200:
                try:
                    return response.json()
                except:
                    raise Exception('Failed to parse Firecrawl response as JSON.')
            else:
                self._handle_error(response, 'start LLMs.txt generation')
        except Exception as e:
            raise ValueError(str(e))
        return {'success': False, 'error': 'Internal server error'}
    def check_generate_llms_text_status(self, id: str) -> Dict[str, Any]:
        """
        Check the status of a LLMs.txt generation operation.
        Args:
            id (str): The ID of the LLMs.txt generation operation.
        Returns:
            Dict[str, Any]: The current status and results of the generation operation.
        Raises:
            Exception: If the status check fails.
        """
        headers = self._prepare_headers()
        try:
            response = self._get_request(f'{self.api_url}/v1/llmstxt/{id}', headers)
            if response.status_code == 200:
                try:
                    return response.json()
                except:
                    raise Exception('Failed to parse Firecrawl response as JSON.')
            elif response.status_code == 404:
                raise Exception('LLMs.txt generation job not found')
            else:
                self._handle_error(response, 'check LLMs.txt generation status')
        except Exception as e:
            raise ValueError(str(e))
        return {'success': False, 'error': 'Internal server error'}
    def _prepare_headers(self, idempotency_key: Optional[str] = None) -> Dict[str, str]:
        """
        Prepare the headers for API requests.