Update generate-llmstxt-service.ts

2025-08-20 12:39:07 +08:00 · 2025-02-19 15:50:59 -03:00 · 2025-02-19 15:50:59 -03:00 · d4cf2269ed
commit d4cf2269ed
parent f5de803a9d
1 changed files with 62 additions and 51 deletions
--- a/apps/api/src/lib/generate-llmstxt/generate-llmstxt-service.ts
+++ b/apps/api/src/lib/generate-llmstxt/generate-llmstxt-service.ts
@ -53,6 +53,7 @@ export async function performGenerateLlmsTxt(options: GenerateLLMsTextServiceOpt
        showFullText: showFullText,
      });
      return {
        success: true,
        data: {
@ -86,9 +87,13 @@ export async function performGenerateLlmsTxt(options: GenerateLLMsTextServiceOpt
    let llmsFulltxt = `# ${url} llms-full.txt\n\n`;
-    // Scrape each URL
+    // Process URLs in batches of 10
-    for (const url of urls) {
+    for (let i = 0; i < urls.length; i += 10) {
      const batch = urls.slice(i, i + 10);
      const batchResults = await Promise.all(batch.map(async (url) => {
        _logger.debug(`Scraping URL: ${url}`);
        try {
          const document = await scrapeDocument(
            {
              url,
@ -103,14 +108,11 @@ export async function performGenerateLlmsTxt(options: GenerateLLMsTextServiceOpt
            { onlyMainContent: true }
          );
-      if (!document) {
+          if (!document || !document.markdown) {
            logger.error(`Failed to scrape URL ${url}`);
-        continue;
+            return null;
          }
      // Process scraped result
      if (!document.markdown) continue;
          _logger.debug(`Generating description for ${document.metadata?.url}`);
          const completion = await openai.beta.chat.completions.parse({
@ -124,24 +126,33 @@ export async function performGenerateLlmsTxt(options: GenerateLLMsTextServiceOpt
            response_format: zodResponseFormat(DescriptionSchema, "description")
          });
      try {
          const parsedResponse = completion.choices[0].message.parsed;
-        const description = parsedResponse!.description;
+          return {
-        const title = parsedResponse!.title;
+            title: parsedResponse!.title,
            description: parsedResponse!.description,
            url: document.metadata?.url,
            markdown: document.markdown
          };
        } catch (error) {
          logger.error(`Failed to process URL ${url}`, { error });
          return null;
        }
      }));
-        llmstxt += `- [${title}](${document.metadata?.url}): ${description}\n`;
+      // Process successful results from batch
-        llmsFulltxt += `## ${title}\n${document.markdown}\n\n`;
+      for (const result of batchResults) {
        if (!result) continue;
-        // Update progress with both generated text and full text
+        llmstxt += `- [${result.title}](${result.url}): ${result.description}\n`;
        llmsFulltxt += `## ${result.title}\n${result.markdown}\n\n`;
      }
      // Update progress after each batch
      await updateGeneratedLlmsTxt(generationId, {
        status: "processing",
        generatedText: llmstxt,
        fullText: llmsFulltxt,
      });
      } catch (error) {
        logger.error(`Failed to parse LLM response for ${document.metadata?.url}`, { error });
        continue;
      }
    }
    // After successful generation, save to cache