From 0d2f93c773c7a7669ad00ff6de95aa61a5e83b3f Mon Sep 17 00:00:00 2001
From: He Tao <taohe@bytedance.com>
Date: Mon, 21 Apr 2025 16:43:06 +0800
Subject: [PATCH] feat: add ppt generation feat

---
 .vscode/launch.json                 |  11 +++
 README.md                           |   4 ++
 src/config/agents.py                |   1 +
 src/ppt/graph/builder.py            |  30 ++++++++
 src/ppt/graph/ppt_composer_node.py  |  33 +++++++++
 src/ppt/graph/ppt_generator_node.py |  25 +++++++
 src/ppt/graph/state.py              |  20 ++++++
 src/prompts/ppt_composer.md         | 107 ++++++++++++++++++++++++++++
 src/server/app.py                   |  21 ++++++
 src/server/chat_request.py          |   4 ++
 10 files changed, 256 insertions(+)
 create mode 100644 src/ppt/graph/builder.py
 create mode 100644 src/ppt/graph/ppt_composer_node.py
 create mode 100644 src/ppt/graph/ppt_generator_node.py
 create mode 100644 src/ppt/graph/state.py
 create mode 100644 src/prompts/ppt_composer.md

diff --git a/.vscode/launch.json b/.vscode/launch.json
index ae95928..b4b1d6b 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -45,5 +45,16 @@
                 "PYTHONPATH": "${workspaceFolder}"
             }
         },
+        {
+            "name": "Python: graph.py",
+            "type": "debugpy",
+            "request": "launch",
+            "program": "${workspaceFolder}/src/ppt/graph/builder.py",
+            "console": "integratedTerminal",
+            "justMyCode": false,
+            "env": {
+                "PYTHONPATH": "${workspaceFolder}"
+            }
+        },
     ]
 }
\ No newline at end of file
diff --git a/README.md b/README.md
index 24e7d66..5f17d81 100644
--- a/README.md
+++ b/README.md
@@ -29,6 +29,10 @@ cp .env.example .env
 # Gemini: https://ai.google.dev/gemini-api/docs/openai
 cp conf.yaml.example conf.yaml
 
+# Install marp for ppt generation
+# https://github.com/marp-team/marp-cli?tab=readme-ov-file#use-package-manager
+brew install marp-cli
+
 # Run the project
 uv run main.py
 ```
diff --git a/src/config/agents.py b/src/config/agents.py
index 5d26c12..4454c56 100644
--- a/src/config/agents.py
+++ b/src/config/agents.py
@@ -14,4 +14,5 @@ AGENT_LLM_MAP: dict[str, LLMType] = {
     "coder": "basic",
     "reporter": "basic",
     "podcast_script_writer": "basic",
+    "ppt_composer": "basic",
 }
diff --git a/src/ppt/graph/builder.py b/src/ppt/graph/builder.py
new file mode 100644
index 0000000..2bf40e5
--- /dev/null
+++ b/src/ppt/graph/builder.py
@@ -0,0 +1,30 @@
+# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+# SPDX-License-Identifier: MIT
+
+from langgraph.graph import END, START, StateGraph
+
+from src.ppt.graph.ppt_composer_node import ppt_composer_node
+from src.ppt.graph.ppt_generator_node import ppt_generator_node
+from src.ppt.graph.state import PPTState
+
+
+def build_graph():
+    """Build and return the ppt workflow graph."""
+    # build state graph
+    builder = StateGraph(PPTState)
+    builder.add_node("ppt_composer", ppt_composer_node)
+    builder.add_node("ppt_generator", ppt_generator_node)
+    builder.add_edge(START, "ppt_composer")
+    builder.add_edge("ppt_composer", "ppt_generator")
+    builder.add_edge("ppt_generator", END)
+    return builder.compile()
+
+
+if __name__ == "__main__":
+    from dotenv import load_dotenv
+
+    load_dotenv()
+
+    report_content = open("examples/nanjing_tangbao.md").read()
+    workflow = build_graph()
+    final_state = workflow.invoke({"input": report_content})
diff --git a/src/ppt/graph/ppt_composer_node.py b/src/ppt/graph/ppt_composer_node.py
new file mode 100644
index 0000000..fe3207d
--- /dev/null
+++ b/src/ppt/graph/ppt_composer_node.py
@@ -0,0 +1,33 @@
+# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+# SPDX-License-Identifier: MIT
+
+import logging
+import os
+import uuid
+
+from langchain.schema import HumanMessage, SystemMessage
+
+from src.config.agents import AGENT_LLM_MAP
+from src.llms.llm import get_llm_by_type
+from src.prompts.template import get_prompt_template
+
+from .state import PPTState
+
+logger = logging.getLogger(__name__)
+
+
+def ppt_composer_node(state: PPTState):
+    logger.info("Generating ppt content...")
+    model = get_llm_by_type(AGENT_LLM_MAP["ppt_composer"])
+    ppt_content = model.invoke(
+        [
+            SystemMessage(content=get_prompt_template("ppt_composer")),
+            HumanMessage(content=state["input"]),
+        ],
+    )
+    logger.info(f"ppt_content: {ppt_content}")
+    # save the ppt content in a temp file
+    temp_ppt_file_path = os.path.join(os.getcwd(), f"ppt_content_{uuid.uuid4()}.md")
+    with open(temp_ppt_file_path, "w") as f:
+        f.write(ppt_content.content)
+    return {"ppt_content": ppt_content, "ppt_file_path": temp_ppt_file_path}
diff --git a/src/ppt/graph/ppt_generator_node.py b/src/ppt/graph/ppt_generator_node.py
new file mode 100644
index 0000000..52a8158
--- /dev/null
+++ b/src/ppt/graph/ppt_generator_node.py
@@ -0,0 +1,25 @@
+# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+# SPDX-License-Identifier: MIT
+
+import logging
+import os
+import subprocess
+import uuid
+
+from src.ppt.graph.state import PPTState
+
+logger = logging.getLogger(__name__)
+
+
+def ppt_generator_node(state: PPTState):
+    logger.info("Generating ppt file...")
+    # use marp cli to generate ppt file
+    # https://github.com/marp-team/marp-cli?tab=readme-ov-file
+    generated_file_path = os.path.join(
+        os.getcwd(), f"generated_ppt_{uuid.uuid4()}.pptx"
+    )
+    subprocess.run(["marp", state["ppt_file_path"], "-o", generated_file_path])
+    # remove the temp file
+    os.remove(state["ppt_file_path"])
+    logger.info(f"generated_file_path: {generated_file_path}")
+    return {"generated_file_path": generated_file_path}
diff --git a/src/ppt/graph/state.py b/src/ppt/graph/state.py
new file mode 100644
index 0000000..0d6fd22
--- /dev/null
+++ b/src/ppt/graph/state.py
@@ -0,0 +1,20 @@
+# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+# SPDX-License-Identifier: MIT
+
+from typing import Optional
+
+from langgraph.graph import MessagesState
+
+
+class PPTState(MessagesState):
+    """State for the ppt generation."""
+
+    # Input
+    input: str = ""
+
+    # Output
+    generated_file_path: str = ""
+
+    # Assets
+    ppt_content: str = ""
+    ppt_file_path: str = ""
diff --git a/src/prompts/ppt_composer.md b/src/prompts/ppt_composer.md
new file mode 100644
index 0000000..9b5170a
--- /dev/null
+++ b/src/prompts/ppt_composer.md
@@ -0,0 +1,107 @@
+# Professional Presentation (PPT) Markdown Assistant
+
+## Purpose
+You are a professional PPT presentation creation assistant who transforms user requirements into a clear, focused Markdown-formatted presentation text. Your output should start directly with the presentation content, without any introductory phrases or explanations.
+
+## Markdown PPT Formatting Guidelines
+
+### Title and Structure
+- Use `#` for the title slide (typically one slide)
+- Use `##` for slide titles
+- Use `###` for subtitles (if needed)
+- Use horizontal rule `---` to separate slides
+
+### Content Formatting
+- Use unordered lists (`*` or `-`) for key points
+- Use ordered lists (`1.`, `2.`) for sequential steps
+- Separate paragraphs with blank lines
+- Use code blocks with triple backticks
+- IMPORTANT: When including images, ONLY use the actual image URLs from the source content. DO NOT create fictional image URLs or placeholders like 'example.com'
+
+## Processing Workflow
+
+### 1. Understand User Requirements
+- Carefully read all provided information
+- Note:
+  * Presentation topic
+  * Target audience
+  * Key messages
+  * Presentation duration
+  * Specific style or format requirements
+
+### 2. Extract Core Content
+- Identify the most important points
+- Remember: PPT supports the speech, not replaces it
+
+### 3. Organize Content Structure
+Typical structure includes:
+- Title Slide
+- Introduction/Agenda
+- Body (multiple sections)
+- Summary/Conclusion
+- Optional Q&A section
+
+### 4. Create Markdown Presentation
+- Ensure each slide focuses on one main point
+- Use concise, powerful language
+- Emphasize points with bullet points
+- Use appropriate title hierarchy
+
+### 5. Review and Optimize
+- Check for completeness
+- Refine text formatting
+- Ensure readability
+
+## Important Guidelines
+- Do not guess or add information not provided
+- Ask clarifying questions if needed
+- Simplify detailed or lengthy information
+- Highlight Markdown advantages (easy editing, version control)
+- ONLY use images that are explicitly provided in the source content
+- NEVER create fictional image URLs or placeholders
+- If you include an image, use the exact URL from the source content
+
+## Input Processing Rules
+- Carefully analyze user input
+- Extract key presentation elements
+- Transform input into structured Markdown format
+- Maintain clarity and logical flow
+
+## Example User Input
+"Help me create a presentation about 'How to Improve Team Collaboration Efficiency' for project managers. Cover: defining team goals, establishing communication mechanisms, using collaboration tools like Slack and Microsoft Teams, and regular reviews and feedback. Presentation length is about 15 minutes."
+
+## Expected Output Format
+
+// IMPORTANT: Your response should start directly with the content below, with no introductory text
+
+# Presentation Title
+
+---
+
+## Agenda
+
+- Key Point 1
+- Key Point 2
+- Key Point 3
+
+---
+
+## Detailed Slide Content
+
+- Specific bullet points
+- Explanatory details
+- Key takeaways
+
+![Image Title](https://actual-source-url.com/image.jpg)
+
+---
+
+
+## Response Guidelines
+- Provide a complete, ready-to-use Markdown presentation
+- Ensure professional and clear formatting
+- Adapt to user's specific context and requirements
+- IMPORTANT: Start your response directly with the presentation content. DO NOT include any introductory phrases like "Here's a presentation about..." or "Here's a professional Markdown-formatted presentation..."
+- Begin your response with the title using a single # heading
+- For images, ONLY use the exact image URLs found in the source content. DO NOT invent or create fictional image URLs
+- If the source content contains images, incorporate them in your presentation using the exact same URLs
\ No newline at end of file
diff --git a/src/server/app.py b/src/server/app.py
index 19d9185..a2f4ccf 100644
--- a/src/server/app.py
+++ b/src/server/app.py
@@ -16,10 +16,12 @@ from langgraph.types import Command
 
 from src.graph.builder import build_graph
 from src.podcast.graph.builder import build_graph as build_podcast_graph
+from src.ppt.graph.builder import build_graph as build_ppt_graph
 from src.server.chat_request import (
     ChatMessage,
     ChatRequest,
     GeneratePodcastRequest,
+    GeneratePPTRequest,
     TTSRequest,
 )
 from src.tools import VolcengineTTS
@@ -216,3 +218,22 @@ async def generate_podcast(request: GeneratePodcastRequest):
     except Exception as e:
         logger.exception(f"Error occurred during podcast generation: {str(e)}")
         raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.post("/api/ppt/generate")
+async def generate_ppt(request: GeneratePPTRequest):
+    try:
+        report_content = request.content
+        print(report_content)
+        workflow = build_ppt_graph()
+        final_state = workflow.invoke({"input": report_content})
+        generated_file_path = final_state["generated_file_path"]
+        with open(generated_file_path, "rb") as f:
+            ppt_bytes = f.read()
+        return Response(
+            content=ppt_bytes,
+            media_type="application/vnd.openxmlformats-officedocument.presentationml.presentation",
+        )
+    except Exception as e:
+        logger.exception(f"Error occurred during ppt generation: {str(e)}")
+        raise HTTPException(status_code=500, detail=str(e))
diff --git a/src/server/chat_request.py b/src/server/chat_request.py
index 1226804..970f52d 100644
--- a/src/server/chat_request.py
+++ b/src/server/chat_request.py
@@ -64,3 +64,7 @@ class TTSRequest(BaseModel):
 
 class GeneratePodcastRequest(BaseModel):
     content: str = Field(..., description="The content of the podcast")
+
+
+class GeneratePPTRequest(BaseModel):
+    content: str = Field(..., description="The content of the ppt")