import { logger as _logger } from "../logger";
import { updateGeneratedLlmsTxt } from "./generate-llmstxt-redis";
import { getMapResults } from "../../controllers/v1/map";
import { z } from "zod";
import { scrapeDocument } from "../extract/document-scraper";
import {
  getLlmsTextFromCache,
  saveLlmsTextToCache,
} from "./generate-llmstxt-supabase";
import { billTeam } from "../../services/billing/credit_billing";
import { logJob } from "../../services/logging/log_job";
import { getModel } from "../generic-ai";
import { generateCompletions } from "../../scraper/scrapeURL/transformers/llmExtract";
import { CostTracking } from "../extract/extraction-service";
interface GenerateLLMsTextServiceOptions {
  generationId: string;
  teamId: string;
  url: string;
  maxUrls: number;
  showFullText: boolean;
  subId?: string;
}

const descriptionSchema = z.object({
  description: z.string(),
  title: z.string(),
});

// Helper function to remove page separators
function removePageSeparators(text: string): string {
  return text.replace(/<\|firecrawl-page-\d+-lllmstxt\|>\n/g, "");
}

// Helper function to limit pages in full text
function limitPages(fullText: string, maxPages: number): string {
  const pages = fullText.split(/<\|firecrawl-page-\d+-lllmstxt\|>\n/);
  // First element is the header, so we start from index 1
  const limitedPages = pages.slice(0, maxPages + 1);
  return limitedPages.join("");
}

// Helper function to limit llmstxt entries
function limitLlmsTxtEntries(llmstxt: string, maxEntries: number): string {
  // Split by newlines
  const lines = llmstxt.split('\n');
  
  // Find the header line (starts with #)
  const headerIndex = lines.findIndex(line => line.startsWith('#'));
  if (headerIndex === -1) return llmstxt;
  
  // Get the header and the entries
  const header = lines[headerIndex];
  const entries = lines.filter(line => line.startsWith('- ['));
  
  // Take only the requested number of entries
  const limitedEntries = entries.slice(0, maxEntries);
  
  // Reconstruct the text
  return `${header}\n\n${limitedEntries.join('\n')}`;
}

export async function performGenerateLlmsTxt(
  options: GenerateLLMsTextServiceOptions,
) {
  const { generationId, teamId, url, maxUrls = 100, showFullText, subId } =
    options;
  const startTime = Date.now();
  const logger = _logger.child({
    module: "generate-llmstxt",
    method: "performGenerateLlmsTxt",
    generationId,
    teamId,
  });
  const costTracking = new CostTracking();

  try {
    // Enforce max URL limit
    const effectiveMaxUrls = Math.min(maxUrls, 5000);

    // Check cache first
    const cachedResult = await getLlmsTextFromCache(url, effectiveMaxUrls);
    if (cachedResult) {
      logger.info("Found cached LLMs text", { url });

      // Limit pages and remove separators before returning
      const limitedFullText = limitPages(cachedResult.llmstxt_full, effectiveMaxUrls);
      const cleanFullText = removePageSeparators(limitedFullText);
      
      // Limit llmstxt entries to match maxUrls
      const limitedLlmsTxt = limitLlmsTxtEntries(cachedResult.llmstxt, effectiveMaxUrls);

      // Update final result with cached text
      await updateGeneratedLlmsTxt(generationId, {
        status: "completed",
        generatedText: limitedLlmsTxt,
        fullText: cleanFullText,
        showFullText: showFullText,
      });

      return {
        success: true,
        data: {
          generatedText: limitedLlmsTxt,
          fullText: cleanFullText,
          showFullText: showFullText,
        },
      };
    }

    // If not in cache, proceed with generation
    // First, get all URLs from the map controller
    const mapResult = await getMapResults({
      url,
      teamId,
      limit: effectiveMaxUrls,
      includeSubdomains: false,
      ignoreSitemap: false,
      includeMetadata: true,
    });

    if (!mapResult || !mapResult.links) {
      throw new Error(`Failed to map URLs`);
    }

    _logger.debug("Mapping URLs", mapResult.links);

    const urls = mapResult.links;
    let llmstxt = `# ${url} llms.txt\n\n`;
    let llmsFulltxt = `# ${url} llms-full.txt\n\n`;

    // Process URLs in batches of 10
    for (let i = 0; i < urls.length; i += 10) {
      const batch = urls.slice(i, i + 10);

      const batchResults = await Promise.all(
        batch.map(async (url) => {
          _logger.debug(`Scraping URL: ${url}`);
          try {
            const document = await scrapeDocument(
              {
                url,
                teamId,
                origin: url,
                timeout: 30000,
                isSingleUrl: true,
              },
              [],
              logger,
              { onlyMainContent: true },
            );

            if (!document || !document.markdown) {
              logger.error(`Failed to scrape URL ${url}`);
              return null;
            }

            _logger.debug(
              `Generating description for ${document.metadata?.url}`,
            );

            const { extract } = await generateCompletions({
              logger,
              model: getModel("gpt-4o-mini", "openai"),
              options: {
                systemPrompt: "",
                mode: "llm",
                schema: descriptionSchema,
                prompt: `Generate a 9-10 word description and a 3-4 word title of the entire page based on ALL the content one will find on the page for this url: ${document.metadata?.url}. This will help in a user finding the page for its intended purpose.`,
              },
              markdown: document.markdown,
              costTrackingOptions: {
                costTracking,
                metadata: {
                  module: "generate-llmstxt",
                  method: "generateDescription",
                },
              },
            });

            return {
              title: extract.title,
              description: extract.description,
              url: document.metadata?.url,
              markdown: document.markdown,
            };
          } catch (error) {
            logger.error(`Failed to process URL ${url}`, { error });
            return null;
          }
        }),
      );

      // Process successful results from batch
      for (const result of batchResults) {
        if (!result) continue;

        llmstxt += `- [${result.title}](${result.url}): ${result.description}\n`;
        llmsFulltxt += `<|firecrawl-page-${i + batchResults.indexOf(result) + 1}-lllmstxt|>\n## ${result.title}\n${result.markdown}\n\n`;
      }

      // Update progress after each batch
      await updateGeneratedLlmsTxt(generationId, {
        status: "processing",
        generatedText: llmstxt,
        fullText: removePageSeparators(llmsFulltxt),
      });
    }

    // After successful generation, save to cache
    await saveLlmsTextToCache(url, llmstxt, llmsFulltxt, effectiveMaxUrls);

    // Limit pages and remove separators before final update
    const limitedFullText = limitPages(llmsFulltxt, effectiveMaxUrls);
    const cleanFullText = removePageSeparators(limitedFullText);

    // Update final result with both generated text and full text
    await updateGeneratedLlmsTxt(generationId, {
      status: "completed",
      generatedText: llmstxt,
      fullText: cleanFullText,
      showFullText: showFullText,
    });

    // Log job with token usage and sources
    await logJob({
      job_id: generationId,
      success: true,
      message: "LLMs text generation completed",
      num_docs: urls.length,
      docs: [{ llmstxt: llmstxt, llmsfulltxt: llmsFulltxt }],
      time_taken: (Date.now() - startTime) / 1000,
      team_id: teamId,
      mode: "llmstxt",
      url: url,
      scrapeOptions: options,
      origin: "api",
      num_tokens: 0,
      tokens_billed: 0,
      sources: {},
      cost_tracking: costTracking,
    });

    // Bill team for usage
    billTeam(teamId, subId, urls.length, logger).catch((error) => {
      logger.error(`Failed to bill team ${teamId} for ${urls.length} urls`, {
        teamId,
        count: urls.length,
        error,
      });
    });

    return {
      success: true,
      data: {
        generatedText: llmstxt,
        fullText: cleanFullText,
        showFullText: showFullText,
      },
    };
  } catch (error: any) {
    logger.error("Generate LLMs text error", { error });

    await updateGeneratedLlmsTxt(generationId, {
      status: "failed",
      error: error.message || "Unknown error occurred",
    });

    throw error;
  }
}