From 5f4df34829f26ce466f4ee875d8e29b55e38d653 Mon Sep 17 00:00:00 2001 From: Bowen Liang Date: Mon, 20 May 2024 15:56:26 +0800 Subject: [PATCH] improve: generalize transformations and scripts of runner and preloads into TemplateTransformer (#4487) --- .../helper/code_executor/code_executor.py | 5 +- .../javascript/javascript_transformer.py | 71 +++------ .../jinja2/jinja2_transformer.py | 140 ++++++------------ .../python3/python3_transformer.py | 116 ++++++--------- .../code_executor/template_transformer.py | 73 ++++++++- .../code_executor/test_code_javascript.py | 11 +- .../nodes/code_executor/test_code_jinja2.py | 17 ++- .../nodes/code_executor/test_code_python3.py | 8 + 8 files changed, 210 insertions(+), 231 deletions(-) diff --git a/api/core/helper/code_executor/code_executor.py b/api/core/helper/code_executor/code_executor.py index 7f1c97b110..bc92dde17d 100644 --- a/api/core/helper/code_executor/code_executor.py +++ b/api/core/helper/code_executor/code_executor.py @@ -12,7 +12,7 @@ from config import get_env from core.helper.code_executor.entities import CodeDependency from core.helper.code_executor.javascript.javascript_transformer import NodeJsTemplateTransformer from core.helper.code_executor.jinja2.jinja2_transformer import Jinja2TemplateTransformer -from core.helper.code_executor.python3.python3_transformer import PYTHON_STANDARD_PACKAGES, Python3TemplateTransformer +from core.helper.code_executor.python3.python3_transformer import Python3TemplateTransformer from core.helper.code_executor.template_transformer import TemplateTransformer logger = logging.getLogger(__name__) @@ -187,7 +187,8 @@ class CodeExecutor: response = response.json() dependencies = response.get('data', {}).get('dependencies', []) return [ - CodeDependency(**dependency) for dependency in dependencies if dependency.get('name') not in PYTHON_STANDARD_PACKAGES + CodeDependency(**dependency) for dependency in dependencies + if dependency.get('name') not in Python3TemplateTransformer.get_standard_packages() ] except Exception as e: logger.exception(f'Failed to list dependencies: {e}') diff --git a/api/core/helper/code_executor/javascript/javascript_transformer.py b/api/core/helper/code_executor/javascript/javascript_transformer.py index 31900e4599..0a550b7f4d 100644 --- a/api/core/helper/code_executor/javascript/javascript_transformer.py +++ b/api/core/helper/code_executor/javascript/javascript_transformer.py @@ -1,58 +1,25 @@ -import json -import re -from typing import Optional +from textwrap import dedent -from core.helper.code_executor.entities import CodeDependency from core.helper.code_executor.template_transformer import TemplateTransformer -NODEJS_RUNNER = """// declare main function here -{{code}} - -// execute main function, and return the result -// inputs is a dict, unstructured inputs -output = main({{inputs}}) - -// convert output to json and print -output = JSON.stringify(output) - -result = `<>${output}<>` - -console.log(result) -""" - -NODEJS_PRELOAD = """""" - class NodeJsTemplateTransformer(TemplateTransformer): @classmethod - def transform_caller(cls, code: str, inputs: dict, - dependencies: Optional[list[CodeDependency]] = None) -> tuple[str, str, list[CodeDependency]]: - """ - Transform code to python runner - :param code: code - :param inputs: inputs - :return: - """ - - # transform inputs to json string - inputs_str = json.dumps(inputs, indent=4, ensure_ascii=False) - - # replace code and inputs - runner = NODEJS_RUNNER.replace('{{code}}', code) - runner = runner.replace('{{inputs}}', inputs_str) - - return runner, NODEJS_PRELOAD, [] - - @classmethod - def transform_response(cls, response: str) -> dict: - """ - Transform response to dict - :param response: response - :return: - """ - # extract result - result = re.search(r'<>(.*)<>', response, re.DOTALL) - if not result: - raise ValueError('Failed to parse result') - result = result.group(1) - return json.loads(result) + def get_runner_script(cls) -> str: + runner_script = dedent( + f""" + // declare main function + {cls._code_placeholder} + + // decode and prepare input object + var inputs_obj = JSON.parse(atob('{cls._inputs_placeholder}')) + + // execute main function + var output_obj = main(inputs_obj) + + // convert output to json and print + var output_json = JSON.stringify(output_obj) + var result = `<>${{output_json}}<>` + console.log(result) + """) + return runner_script diff --git a/api/core/helper/code_executor/jinja2/jinja2_transformer.py b/api/core/helper/code_executor/jinja2/jinja2_transformer.py index 01cfe861fa..a8f8095d52 100644 --- a/api/core/helper/code_executor/jinja2/jinja2_transformer.py +++ b/api/core/helper/code_executor/jinja2/jinja2_transformer.py @@ -1,94 +1,13 @@ -import json -import re -from base64 import b64encode -from typing import Optional +from textwrap import dedent -from core.helper.code_executor.entities import CodeDependency -from core.helper.code_executor.python3.python3_transformer import PYTHON_STANDARD_PACKAGES +from core.helper.code_executor.python3.python3_transformer import Python3TemplateTransformer from core.helper.code_executor.template_transformer import TemplateTransformer -PYTHON_RUNNER = """ -import jinja2 -from json import loads -from base64 import b64decode - -template = jinja2.Template('''{{code}}''') - -def main(**inputs): - return template.render(**inputs) - -# execute main function, and return the result -inputs = b64decode('{{inputs}}').decode('utf-8') -output = main(**loads(inputs)) - -result = f'''<>{output}<>''' - -print(result) - -""" - -JINJA2_PRELOAD_TEMPLATE = """{% set fruits = ['Apple'] %} -{{ 'a' }} -{% for fruit in fruits %} -
  • {{ fruit }}
  • -{% endfor %} -{% if fruits|length > 1 %} -1 -{% endif %} -{% for i in range(5) %} - {% if i == 3 %}{{ i }}{% else %}{% endif %} -{% endfor %} - {% for i in range(3) %} - {{ i + 1 }} - {% endfor %} -{% macro say_hello() %}a{{ 'b' }}{% endmacro %} -{{ s }}{{ say_hello() }}""" - -JINJA2_PRELOAD = f""" -import jinja2 -from base64 import b64decode - -def _jinja2_preload_(): - # prepare jinja2 environment, load template and render before to avoid sandbox issue - template = jinja2.Template('''{JINJA2_PRELOAD_TEMPLATE}''') - template.render(s='a') - -if __name__ == '__main__': - _jinja2_preload_() - -""" - class Jinja2TemplateTransformer(TemplateTransformer): @classmethod - def transform_caller(cls, code: str, inputs: dict, - dependencies: Optional[list[CodeDependency]] = None) -> tuple[str, str, list[CodeDependency]]: - """ - Transform code to python runner - :param code: code - :param inputs: inputs - :return: - """ - - inputs_str = b64encode(json.dumps(inputs, ensure_ascii=False).encode()).decode('utf-8') - - # transform jinja2 template to python code - runner = PYTHON_RUNNER.replace('{{code}}', code) - runner = runner.replace('{{inputs}}', inputs_str) - - if not dependencies: - dependencies = [] - - # add native packages and jinja2 - for package in PYTHON_STANDARD_PACKAGES.union(['jinja2']): - dependencies.append(CodeDependency(name=package, version='')) - - # deduplicate - dependencies = list({ - dep.name: dep for dep in dependencies if dep.name - }.values()) - - return runner, JINJA2_PRELOAD, dependencies + def get_standard_packages(cls) -> set[str]: + return {'jinja2'} | Python3TemplateTransformer.get_standard_packages() @classmethod def transform_response(cls, response: str) -> dict: @@ -97,12 +16,49 @@ class Jinja2TemplateTransformer(TemplateTransformer): :param response: response :return: """ - # extract result - result = re.search(r'<>(.*)<>', response, re.DOTALL) - if not result: - raise ValueError('Failed to parse result') - result = result.group(1) - return { - 'result': result + 'result': cls.extract_result_str_from_response(response) } + + @classmethod + def get_runner_script(cls) -> str: + runner_script = dedent(f""" + # declare main function + def main(**inputs): + import jinja2 + template = jinja2.Template('''{cls._code_placeholder}''') + return template.render(**inputs) + + import json + from base64 import b64decode + + # decode and prepare input dict + inputs_obj = json.loads(b64decode('{cls._inputs_placeholder}').decode('utf-8')) + + # execute main function + output = main(**inputs_obj) + + # convert output and print + result = f'''<>{{output}}<>''' + print(result) + + """) + return runner_script + + @classmethod + def get_preload_script(cls) -> str: + preload_script = dedent(""" + import jinja2 + from base64 import b64decode + + def _jinja2_preload_(): + # prepare jinja2 environment, load template and render before to avoid sandbox issue + template = jinja2.Template('{{s}}') + template.render(s='a') + + if __name__ == '__main__': + _jinja2_preload_() + + """) + + return preload_script diff --git a/api/core/helper/code_executor/python3/python3_transformer.py b/api/core/helper/code_executor/python3/python3_transformer.py index a18c593298..4a5fa35093 100644 --- a/api/core/helper/code_executor/python3/python3_transformer.py +++ b/api/core/helper/code_executor/python3/python3_transformer.py @@ -1,83 +1,51 @@ -import json -import re -from base64 import b64encode from textwrap import dedent -from typing import Optional -from core.helper.code_executor.entities import CodeDependency from core.helper.code_executor.template_transformer import TemplateTransformer -PYTHON_RUNNER = dedent(""" -# declare main function here -{{code}} - -from json import loads, dumps -from base64 import b64decode - -# execute main function, and return the result -# inputs is a dict, and it -inputs = b64decode('{{inputs}}').decode('utf-8') -output = main(**json.loads(inputs)) - -# convert output to json and print -output = dumps(output, indent=4) - -result = f'''<> -{output} -<>''' - -print(result) -""") - -PYTHON_PRELOAD = """""" - -PYTHON_STANDARD_PACKAGES = { - 'json', 'datetime', 'math', 'random', 're', 'string', 'sys', 'time', 'traceback', 'uuid', 'os', 'base64', - 'hashlib', 'hmac', 'binascii', 'collections', 'functools', 'operator', 'itertools', 'uuid', -} - class Python3TemplateTransformer(TemplateTransformer): @classmethod - def transform_caller(cls, code: str, inputs: dict, - dependencies: Optional[list[CodeDependency]] = None) -> tuple[str, str, list[CodeDependency]]: - """ - Transform code to python runner - :param code: code - :param inputs: inputs - :return: - """ - - # transform inputs to json string - inputs_str = b64encode(json.dumps(inputs, ensure_ascii=False).encode()).decode('utf-8') + def get_standard_packages(cls) -> set[str]: + return { + 'base64', + 'binascii', + 'collections', + 'datetime', + 'functools', + 'hashlib', + 'hmac', + 'itertools', + 'json', + 'math', + 'operator', + 'os', + 'random', + 're', + 'string', + 'sys', + 'time', + 'traceback', + 'uuid', + } - # replace code and inputs - runner = PYTHON_RUNNER.replace('{{code}}', code) - runner = runner.replace('{{inputs}}', inputs_str) - - # add standard packages - if dependencies is None: - dependencies = [] - - for package in PYTHON_STANDARD_PACKAGES: - if package not in dependencies: - dependencies.append(CodeDependency(name=package, version='')) - - # deduplicate - dependencies = list({dep.name: dep for dep in dependencies if dep.name}.values()) - - return runner, PYTHON_PRELOAD, dependencies - @classmethod - def transform_response(cls, response: str) -> dict: - """ - Transform response to dict - :param response: response - :return: - """ - # extract result - result = re.search(r'<>(.*?)<>', response, re.DOTALL) - if not result: - raise ValueError('Failed to parse result') - result = result.group(1) - return json.loads(result) + def get_runner_script(cls) -> str: + runner_script = dedent(f""" + # declare main function + {cls._code_placeholder} + + import json + from base64 import b64decode + + # decode and prepare input dict + inputs_obj = json.loads(b64decode('{cls._inputs_placeholder}').decode('utf-8')) + + # execute main function + output_obj = main(**inputs_obj) + + # convert output to json and print + output_json = json.dumps(output_obj, indent=4) + result = f'''<>{{output_json}}<>''' + print(result) + """) + return runner_script diff --git a/api/core/helper/code_executor/template_transformer.py b/api/core/helper/code_executor/template_transformer.py index b83d3df30a..39af803f6e 100644 --- a/api/core/helper/code_executor/template_transformer.py +++ b/api/core/helper/code_executor/template_transformer.py @@ -1,13 +1,25 @@ +import json +import re from abc import ABC, abstractmethod +from base64 import b64encode from typing import Optional +from pydantic import BaseModel + from core.helper.code_executor.entities import CodeDependency -class TemplateTransformer(ABC): +class TemplateTransformer(ABC, BaseModel): + _code_placeholder: str = '{{code}}' + _inputs_placeholder: str = '{{inputs}}' + _result_tag: str = '<>' + @classmethod - @abstractmethod - def transform_caller(cls, code: str, inputs: dict, + def get_standard_packages(cls) -> set[str]: + return set() + + @classmethod + def transform_caller(cls, code: str, inputs: dict, dependencies: Optional[list[CodeDependency]] = None) -> tuple[str, str, list[CodeDependency]]: """ Transform code to python runner @@ -15,14 +27,61 @@ class TemplateTransformer(ABC): :param inputs: inputs :return: runner, preload """ - pass - + runner_script = cls.assemble_runner_script(code, inputs) + preload_script = cls.get_preload_script() + + packages = dependencies or [] + standard_packages = cls.get_standard_packages() + for package in standard_packages: + if package not in packages: + packages.append(CodeDependency(name=package, version='')) + packages = list({dep.name: dep for dep in packages if dep.name}.values()) + + return runner_script, preload_script, packages + + @classmethod + def extract_result_str_from_response(cls, response: str) -> str: + result = re.search(rf'{cls._result_tag}(.*){cls._result_tag}', response, re.DOTALL) + if not result: + raise ValueError('Failed to parse result') + result = result.group(1) + return result + @classmethod - @abstractmethod def transform_response(cls, response: str) -> dict: """ Transform response to dict :param response: response :return: """ - pass \ No newline at end of file + return json.loads(cls.extract_result_str_from_response(response)) + + @classmethod + @abstractmethod + def get_runner_script(cls) -> str: + """ + Get runner script + """ + pass + + @classmethod + def serialize_inputs(cls, inputs: dict) -> str: + inputs_json_str = json.dumps(inputs, ensure_ascii=False).encode() + input_base64_encoded = b64encode(inputs_json_str).decode('utf-8') + return input_base64_encoded + + @classmethod + def assemble_runner_script(cls, code: str, inputs: dict) -> str: + # assemble runner script + script = cls.get_runner_script() + script = script.replace(cls._code_placeholder, code) + inputs_str = cls.serialize_inputs(inputs) + script = script.replace(cls._inputs_placeholder, inputs_str) + return script + + @classmethod + def get_preload_script(cls) -> str: + """ + Get preload script + """ + return '' diff --git a/api/tests/integration_tests/workflow/nodes/code_executor/test_code_javascript.py b/api/tests/integration_tests/workflow/nodes/code_executor/test_code_javascript.py index 1ba9331a3e..2d798eb9c2 100644 --- a/api/tests/integration_tests/workflow/nodes/code_executor/test_code_javascript.py +++ b/api/tests/integration_tests/workflow/nodes/code_executor/test_code_javascript.py @@ -2,6 +2,7 @@ from textwrap import dedent from core.helper.code_executor.code_executor import CodeExecutor, CodeLanguage from core.helper.code_executor.javascript.javascript_code_provider import JavascriptCodeProvider +from core.helper.code_executor.javascript.javascript_transformer import NodeJsTemplateTransformer CODE_LANGUAGE = CodeLanguage.JAVASCRIPT @@ -23,7 +24,8 @@ def test_javascript_json(): def test_javascript_with_code_template(): result = CodeExecutor.execute_workflow_code_template( - language=CODE_LANGUAGE, code=JavascriptCodeProvider.get_default_code(), inputs={'arg1': 'Hello', 'arg2': 'World'}) + language=CODE_LANGUAGE, code=JavascriptCodeProvider.get_default_code(), + inputs={'arg1': 'Hello', 'arg2': 'World'}) assert result == {'result': 'HelloWorld'} @@ -32,3 +34,10 @@ def test_javascript_list_default_available_packages(): # no default packages available for javascript assert len(packages) == 0 + + +def test_javascript_get_runner_script(): + runner_script = NodeJsTemplateTransformer.get_runner_script() + assert runner_script.count(NodeJsTemplateTransformer._code_placeholder) == 1 + assert runner_script.count(NodeJsTemplateTransformer._inputs_placeholder) == 1 + assert runner_script.count(NodeJsTemplateTransformer._result_tag) == 2 diff --git a/api/tests/integration_tests/workflow/nodes/code_executor/test_code_jinja2.py b/api/tests/integration_tests/workflow/nodes/code_executor/test_code_jinja2.py index 070531e2d4..425f4cbdd4 100644 --- a/api/tests/integration_tests/workflow/nodes/code_executor/test_code_jinja2.py +++ b/api/tests/integration_tests/workflow/nodes/code_executor/test_code_jinja2.py @@ -1,7 +1,7 @@ import base64 from core.helper.code_executor.code_executor import CodeExecutor, CodeLanguage -from core.helper.code_executor.jinja2.jinja2_transformer import JINJA2_PRELOAD, PYTHON_RUNNER +from core.helper.code_executor.jinja2.jinja2_transformer import Jinja2TemplateTransformer CODE_LANGUAGE = CodeLanguage.JINJA2 @@ -9,8 +9,12 @@ CODE_LANGUAGE = CodeLanguage.JINJA2 def test_jinja2(): template = 'Hello {{template}}' inputs = base64.b64encode(b'{"template": "World"}').decode('utf-8') - code = PYTHON_RUNNER.replace('{{code}}', template).replace('{{inputs}}', inputs) - result = CodeExecutor.execute_code(language=CODE_LANGUAGE, preload=JINJA2_PRELOAD, code=code) + code = (Jinja2TemplateTransformer.get_runner_script() + .replace(Jinja2TemplateTransformer._code_placeholder, template) + .replace(Jinja2TemplateTransformer._inputs_placeholder, inputs)) + result = CodeExecutor.execute_code(language=CODE_LANGUAGE, + preload=Jinja2TemplateTransformer.get_preload_script(), + code=code) assert result == '<>Hello World<>\n' @@ -18,3 +22,10 @@ def test_jinja2_with_code_template(): result = CodeExecutor.execute_workflow_code_template( language=CODE_LANGUAGE, code='Hello {{template}}', inputs={'template': 'World'}) assert result == {'result': 'Hello World'} + + +def test_jinja2_get_runner_script(): + runner_script = Jinja2TemplateTransformer.get_runner_script() + assert runner_script.count(Jinja2TemplateTransformer._code_placeholder) == 1 + assert runner_script.count(Jinja2TemplateTransformer._inputs_placeholder) == 1 + assert runner_script.count(Jinja2TemplateTransformer._result_tag) == 2 diff --git a/api/tests/integration_tests/workflow/nodes/code_executor/test_code_python3.py b/api/tests/integration_tests/workflow/nodes/code_executor/test_code_python3.py index a2af591f2e..d265011d4c 100644 --- a/api/tests/integration_tests/workflow/nodes/code_executor/test_code_python3.py +++ b/api/tests/integration_tests/workflow/nodes/code_executor/test_code_python3.py @@ -3,6 +3,7 @@ from textwrap import dedent from core.helper.code_executor.code_executor import CodeExecutor, CodeLanguage from core.helper.code_executor.python3.python3_code_provider import Python3CodeProvider +from core.helper.code_executor.python3.python3_transformer import Python3TemplateTransformer CODE_LANGUAGE = CodeLanguage.PYTHON3 @@ -35,3 +36,10 @@ def test_python3_list_default_available_packages(): # check JSON serializable assert len(str(json.dumps(packages))) > 0 + + +def test_python3_get_runner_script(): + runner_script = Python3TemplateTransformer.get_runner_script() + assert runner_script.count(Python3TemplateTransformer._code_placeholder) == 1 + assert runner_script.count(Python3TemplateTransformer._inputs_placeholder) == 1 + assert runner_script.count(Python3TemplateTransformer._result_tag) == 2