From 0d2f93c773c7a7669ad00ff6de95aa61a5e83b3f Mon Sep 17 00:00:00 2001 From: He Tao Date: Mon, 21 Apr 2025 16:43:06 +0800 Subject: [PATCH] feat: add ppt generation feat --- .vscode/launch.json | 11 +++ README.md | 4 ++ src/config/agents.py | 1 + src/ppt/graph/builder.py | 30 ++++++++ src/ppt/graph/ppt_composer_node.py | 33 +++++++++ src/ppt/graph/ppt_generator_node.py | 25 +++++++ src/ppt/graph/state.py | 20 ++++++ src/prompts/ppt_composer.md | 107 ++++++++++++++++++++++++++++ src/server/app.py | 21 ++++++ src/server/chat_request.py | 4 ++ 10 files changed, 256 insertions(+) create mode 100644 src/ppt/graph/builder.py create mode 100644 src/ppt/graph/ppt_composer_node.py create mode 100644 src/ppt/graph/ppt_generator_node.py create mode 100644 src/ppt/graph/state.py create mode 100644 src/prompts/ppt_composer.md diff --git a/.vscode/launch.json b/.vscode/launch.json index ae95928..b4b1d6b 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -45,5 +45,16 @@ "PYTHONPATH": "${workspaceFolder}" } }, + { + "name": "Python: graph.py", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/src/ppt/graph/builder.py", + "console": "integratedTerminal", + "justMyCode": false, + "env": { + "PYTHONPATH": "${workspaceFolder}" + } + }, ] } \ No newline at end of file diff --git a/README.md b/README.md index 24e7d66..5f17d81 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,10 @@ cp .env.example .env # Gemini: https://ai.google.dev/gemini-api/docs/openai cp conf.yaml.example conf.yaml +# Install marp for ppt generation +# https://github.com/marp-team/marp-cli?tab=readme-ov-file#use-package-manager +brew install marp-cli + # Run the project uv run main.py ``` diff --git a/src/config/agents.py b/src/config/agents.py index 5d26c12..4454c56 100644 --- a/src/config/agents.py +++ b/src/config/agents.py @@ -14,4 +14,5 @@ AGENT_LLM_MAP: dict[str, LLMType] = { "coder": "basic", "reporter": "basic", "podcast_script_writer": "basic", + "ppt_composer": "basic", } diff --git a/src/ppt/graph/builder.py b/src/ppt/graph/builder.py new file mode 100644 index 0000000..2bf40e5 --- /dev/null +++ b/src/ppt/graph/builder.py @@ -0,0 +1,30 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# SPDX-License-Identifier: MIT + +from langgraph.graph import END, START, StateGraph + +from src.ppt.graph.ppt_composer_node import ppt_composer_node +from src.ppt.graph.ppt_generator_node import ppt_generator_node +from src.ppt.graph.state import PPTState + + +def build_graph(): + """Build and return the ppt workflow graph.""" + # build state graph + builder = StateGraph(PPTState) + builder.add_node("ppt_composer", ppt_composer_node) + builder.add_node("ppt_generator", ppt_generator_node) + builder.add_edge(START, "ppt_composer") + builder.add_edge("ppt_composer", "ppt_generator") + builder.add_edge("ppt_generator", END) + return builder.compile() + + +if __name__ == "__main__": + from dotenv import load_dotenv + + load_dotenv() + + report_content = open("examples/nanjing_tangbao.md").read() + workflow = build_graph() + final_state = workflow.invoke({"input": report_content}) diff --git a/src/ppt/graph/ppt_composer_node.py b/src/ppt/graph/ppt_composer_node.py new file mode 100644 index 0000000..fe3207d --- /dev/null +++ b/src/ppt/graph/ppt_composer_node.py @@ -0,0 +1,33 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# SPDX-License-Identifier: MIT + +import logging +import os +import uuid + +from langchain.schema import HumanMessage, SystemMessage + +from src.config.agents import AGENT_LLM_MAP +from src.llms.llm import get_llm_by_type +from src.prompts.template import get_prompt_template + +from .state import PPTState + +logger = logging.getLogger(__name__) + + +def ppt_composer_node(state: PPTState): + logger.info("Generating ppt content...") + model = get_llm_by_type(AGENT_LLM_MAP["ppt_composer"]) + ppt_content = model.invoke( + [ + SystemMessage(content=get_prompt_template("ppt_composer")), + HumanMessage(content=state["input"]), + ], + ) + logger.info(f"ppt_content: {ppt_content}") + # save the ppt content in a temp file + temp_ppt_file_path = os.path.join(os.getcwd(), f"ppt_content_{uuid.uuid4()}.md") + with open(temp_ppt_file_path, "w") as f: + f.write(ppt_content.content) + return {"ppt_content": ppt_content, "ppt_file_path": temp_ppt_file_path} diff --git a/src/ppt/graph/ppt_generator_node.py b/src/ppt/graph/ppt_generator_node.py new file mode 100644 index 0000000..52a8158 --- /dev/null +++ b/src/ppt/graph/ppt_generator_node.py @@ -0,0 +1,25 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# SPDX-License-Identifier: MIT + +import logging +import os +import subprocess +import uuid + +from src.ppt.graph.state import PPTState + +logger = logging.getLogger(__name__) + + +def ppt_generator_node(state: PPTState): + logger.info("Generating ppt file...") + # use marp cli to generate ppt file + # https://github.com/marp-team/marp-cli?tab=readme-ov-file + generated_file_path = os.path.join( + os.getcwd(), f"generated_ppt_{uuid.uuid4()}.pptx" + ) + subprocess.run(["marp", state["ppt_file_path"], "-o", generated_file_path]) + # remove the temp file + os.remove(state["ppt_file_path"]) + logger.info(f"generated_file_path: {generated_file_path}") + return {"generated_file_path": generated_file_path} diff --git a/src/ppt/graph/state.py b/src/ppt/graph/state.py new file mode 100644 index 0000000..0d6fd22 --- /dev/null +++ b/src/ppt/graph/state.py @@ -0,0 +1,20 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# SPDX-License-Identifier: MIT + +from typing import Optional + +from langgraph.graph import MessagesState + + +class PPTState(MessagesState): + """State for the ppt generation.""" + + # Input + input: str = "" + + # Output + generated_file_path: str = "" + + # Assets + ppt_content: str = "" + ppt_file_path: str = "" diff --git a/src/prompts/ppt_composer.md b/src/prompts/ppt_composer.md new file mode 100644 index 0000000..9b5170a --- /dev/null +++ b/src/prompts/ppt_composer.md @@ -0,0 +1,107 @@ +# Professional Presentation (PPT) Markdown Assistant + +## Purpose +You are a professional PPT presentation creation assistant who transforms user requirements into a clear, focused Markdown-formatted presentation text. Your output should start directly with the presentation content, without any introductory phrases or explanations. + +## Markdown PPT Formatting Guidelines + +### Title and Structure +- Use `#` for the title slide (typically one slide) +- Use `##` for slide titles +- Use `###` for subtitles (if needed) +- Use horizontal rule `---` to separate slides + +### Content Formatting +- Use unordered lists (`*` or `-`) for key points +- Use ordered lists (`1.`, `2.`) for sequential steps +- Separate paragraphs with blank lines +- Use code blocks with triple backticks +- IMPORTANT: When including images, ONLY use the actual image URLs from the source content. DO NOT create fictional image URLs or placeholders like 'example.com' + +## Processing Workflow + +### 1. Understand User Requirements +- Carefully read all provided information +- Note: + * Presentation topic + * Target audience + * Key messages + * Presentation duration + * Specific style or format requirements + +### 2. Extract Core Content +- Identify the most important points +- Remember: PPT supports the speech, not replaces it + +### 3. Organize Content Structure +Typical structure includes: +- Title Slide +- Introduction/Agenda +- Body (multiple sections) +- Summary/Conclusion +- Optional Q&A section + +### 4. Create Markdown Presentation +- Ensure each slide focuses on one main point +- Use concise, powerful language +- Emphasize points with bullet points +- Use appropriate title hierarchy + +### 5. Review and Optimize +- Check for completeness +- Refine text formatting +- Ensure readability + +## Important Guidelines +- Do not guess or add information not provided +- Ask clarifying questions if needed +- Simplify detailed or lengthy information +- Highlight Markdown advantages (easy editing, version control) +- ONLY use images that are explicitly provided in the source content +- NEVER create fictional image URLs or placeholders +- If you include an image, use the exact URL from the source content + +## Input Processing Rules +- Carefully analyze user input +- Extract key presentation elements +- Transform input into structured Markdown format +- Maintain clarity and logical flow + +## Example User Input +"Help me create a presentation about 'How to Improve Team Collaboration Efficiency' for project managers. Cover: defining team goals, establishing communication mechanisms, using collaboration tools like Slack and Microsoft Teams, and regular reviews and feedback. Presentation length is about 15 minutes." + +## Expected Output Format + +// IMPORTANT: Your response should start directly with the content below, with no introductory text + +# Presentation Title + +--- + +## Agenda + +- Key Point 1 +- Key Point 2 +- Key Point 3 + +--- + +## Detailed Slide Content + +- Specific bullet points +- Explanatory details +- Key takeaways + +![Image Title](https://actual-source-url.com/image.jpg) + +--- + + +## Response Guidelines +- Provide a complete, ready-to-use Markdown presentation +- Ensure professional and clear formatting +- Adapt to user's specific context and requirements +- IMPORTANT: Start your response directly with the presentation content. DO NOT include any introductory phrases like "Here's a presentation about..." or "Here's a professional Markdown-formatted presentation..." +- Begin your response with the title using a single # heading +- For images, ONLY use the exact image URLs found in the source content. DO NOT invent or create fictional image URLs +- If the source content contains images, incorporate them in your presentation using the exact same URLs \ No newline at end of file diff --git a/src/server/app.py b/src/server/app.py index 19d9185..a2f4ccf 100644 --- a/src/server/app.py +++ b/src/server/app.py @@ -16,10 +16,12 @@ from langgraph.types import Command from src.graph.builder import build_graph from src.podcast.graph.builder import build_graph as build_podcast_graph +from src.ppt.graph.builder import build_graph as build_ppt_graph from src.server.chat_request import ( ChatMessage, ChatRequest, GeneratePodcastRequest, + GeneratePPTRequest, TTSRequest, ) from src.tools import VolcengineTTS @@ -216,3 +218,22 @@ async def generate_podcast(request: GeneratePodcastRequest): except Exception as e: logger.exception(f"Error occurred during podcast generation: {str(e)}") raise HTTPException(status_code=500, detail=str(e)) + + +@app.post("/api/ppt/generate") +async def generate_ppt(request: GeneratePPTRequest): + try: + report_content = request.content + print(report_content) + workflow = build_ppt_graph() + final_state = workflow.invoke({"input": report_content}) + generated_file_path = final_state["generated_file_path"] + with open(generated_file_path, "rb") as f: + ppt_bytes = f.read() + return Response( + content=ppt_bytes, + media_type="application/vnd.openxmlformats-officedocument.presentationml.presentation", + ) + except Exception as e: + logger.exception(f"Error occurred during ppt generation: {str(e)}") + raise HTTPException(status_code=500, detail=str(e)) diff --git a/src/server/chat_request.py b/src/server/chat_request.py index 1226804..970f52d 100644 --- a/src/server/chat_request.py +++ b/src/server/chat_request.py @@ -64,3 +64,7 @@ class TTSRequest(BaseModel): class GeneratePodcastRequest(BaseModel): content: str = Field(..., description="The content of the podcast") + + +class GeneratePPTRequest(BaseModel): + content: str = Field(..., description="The content of the ppt")