improve: generalize transformations and scripts of runner and preloads into TemplateTransformer (#4487)

This commit is contained in:
Bowen Liang 2024-05-20 15:56:26 +08:00 committed by GitHub
parent c255a20d7c
commit 5f4df34829
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 210 additions and 231 deletions

View File

@ -12,7 +12,7 @@ from config import get_env
from core.helper.code_executor.entities import CodeDependency from core.helper.code_executor.entities import CodeDependency
from core.helper.code_executor.javascript.javascript_transformer import NodeJsTemplateTransformer from core.helper.code_executor.javascript.javascript_transformer import NodeJsTemplateTransformer
from core.helper.code_executor.jinja2.jinja2_transformer import Jinja2TemplateTransformer from core.helper.code_executor.jinja2.jinja2_transformer import Jinja2TemplateTransformer
from core.helper.code_executor.python3.python3_transformer import PYTHON_STANDARD_PACKAGES, Python3TemplateTransformer from core.helper.code_executor.python3.python3_transformer import Python3TemplateTransformer
from core.helper.code_executor.template_transformer import TemplateTransformer from core.helper.code_executor.template_transformer import TemplateTransformer
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -187,7 +187,8 @@ class CodeExecutor:
response = response.json() response = response.json()
dependencies = response.get('data', {}).get('dependencies', []) dependencies = response.get('data', {}).get('dependencies', [])
return [ return [
CodeDependency(**dependency) for dependency in dependencies if dependency.get('name') not in PYTHON_STANDARD_PACKAGES CodeDependency(**dependency) for dependency in dependencies
if dependency.get('name') not in Python3TemplateTransformer.get_standard_packages()
] ]
except Exception as e: except Exception as e:
logger.exception(f'Failed to list dependencies: {e}') logger.exception(f'Failed to list dependencies: {e}')

View File

@ -1,58 +1,25 @@
import json from textwrap import dedent
import re
from typing import Optional
from core.helper.code_executor.entities import CodeDependency
from core.helper.code_executor.template_transformer import TemplateTransformer from core.helper.code_executor.template_transformer import TemplateTransformer
NODEJS_RUNNER = """// declare main function here
{{code}}
// execute main function, and return the result
// inputs is a dict, unstructured inputs
output = main({{inputs}})
// convert output to json and print
output = JSON.stringify(output)
result = `<<RESULT>>${output}<<RESULT>>`
console.log(result)
"""
NODEJS_PRELOAD = """"""
class NodeJsTemplateTransformer(TemplateTransformer): class NodeJsTemplateTransformer(TemplateTransformer):
@classmethod @classmethod
def transform_caller(cls, code: str, inputs: dict, def get_runner_script(cls) -> str:
dependencies: Optional[list[CodeDependency]] = None) -> tuple[str, str, list[CodeDependency]]: runner_script = dedent(
""" f"""
Transform code to python runner // declare main function
:param code: code {cls._code_placeholder}
:param inputs: inputs
:return:
"""
# transform inputs to json string // decode and prepare input object
inputs_str = json.dumps(inputs, indent=4, ensure_ascii=False) var inputs_obj = JSON.parse(atob('{cls._inputs_placeholder}'))
# replace code and inputs // execute main function
runner = NODEJS_RUNNER.replace('{{code}}', code) var output_obj = main(inputs_obj)
runner = runner.replace('{{inputs}}', inputs_str)
return runner, NODEJS_PRELOAD, [] // convert output to json and print
var output_json = JSON.stringify(output_obj)
@classmethod var result = `<<RESULT>>${{output_json}}<<RESULT>>`
def transform_response(cls, response: str) -> dict: console.log(result)
""" """)
Transform response to dict return runner_script
:param response: response
:return:
"""
# extract result
result = re.search(r'<<RESULT>>(.*)<<RESULT>>', response, re.DOTALL)
if not result:
raise ValueError('Failed to parse result')
result = result.group(1)
return json.loads(result)

View File

@ -1,94 +1,13 @@
import json from textwrap import dedent
import re
from base64 import b64encode
from typing import Optional
from core.helper.code_executor.entities import CodeDependency from core.helper.code_executor.python3.python3_transformer import Python3TemplateTransformer
from core.helper.code_executor.python3.python3_transformer import PYTHON_STANDARD_PACKAGES
from core.helper.code_executor.template_transformer import TemplateTransformer from core.helper.code_executor.template_transformer import TemplateTransformer
PYTHON_RUNNER = """
import jinja2
from json import loads
from base64 import b64decode
template = jinja2.Template('''{{code}}''')
def main(**inputs):
return template.render(**inputs)
# execute main function, and return the result
inputs = b64decode('{{inputs}}').decode('utf-8')
output = main(**loads(inputs))
result = f'''<<RESULT>>{output}<<RESULT>>'''
print(result)
"""
JINJA2_PRELOAD_TEMPLATE = """{% set fruits = ['Apple'] %}
{{ 'a' }}
{% for fruit in fruits %}
<li>{{ fruit }}</li>
{% endfor %}
{% if fruits|length > 1 %}
1
{% endif %}
{% for i in range(5) %}
{% if i == 3 %}{{ i }}{% else %}{% endif %}
{% endfor %}
{% for i in range(3) %}
{{ i + 1 }}
{% endfor %}
{% macro say_hello() %}a{{ 'b' }}{% endmacro %}
{{ s }}{{ say_hello() }}"""
JINJA2_PRELOAD = f"""
import jinja2
from base64 import b64decode
def _jinja2_preload_():
# prepare jinja2 environment, load template and render before to avoid sandbox issue
template = jinja2.Template('''{JINJA2_PRELOAD_TEMPLATE}''')
template.render(s='a')
if __name__ == '__main__':
_jinja2_preload_()
"""
class Jinja2TemplateTransformer(TemplateTransformer): class Jinja2TemplateTransformer(TemplateTransformer):
@classmethod @classmethod
def transform_caller(cls, code: str, inputs: dict, def get_standard_packages(cls) -> set[str]:
dependencies: Optional[list[CodeDependency]] = None) -> tuple[str, str, list[CodeDependency]]: return {'jinja2'} | Python3TemplateTransformer.get_standard_packages()
"""
Transform code to python runner
:param code: code
:param inputs: inputs
:return:
"""
inputs_str = b64encode(json.dumps(inputs, ensure_ascii=False).encode()).decode('utf-8')
# transform jinja2 template to python code
runner = PYTHON_RUNNER.replace('{{code}}', code)
runner = runner.replace('{{inputs}}', inputs_str)
if not dependencies:
dependencies = []
# add native packages and jinja2
for package in PYTHON_STANDARD_PACKAGES.union(['jinja2']):
dependencies.append(CodeDependency(name=package, version=''))
# deduplicate
dependencies = list({
dep.name: dep for dep in dependencies if dep.name
}.values())
return runner, JINJA2_PRELOAD, dependencies
@classmethod @classmethod
def transform_response(cls, response: str) -> dict: def transform_response(cls, response: str) -> dict:
@ -97,12 +16,49 @@ class Jinja2TemplateTransformer(TemplateTransformer):
:param response: response :param response: response
:return: :return:
""" """
# extract result
result = re.search(r'<<RESULT>>(.*)<<RESULT>>', response, re.DOTALL)
if not result:
raise ValueError('Failed to parse result')
result = result.group(1)
return { return {
'result': result 'result': cls.extract_result_str_from_response(response)
} }
@classmethod
def get_runner_script(cls) -> str:
runner_script = dedent(f"""
# declare main function
def main(**inputs):
import jinja2
template = jinja2.Template('''{cls._code_placeholder}''')
return template.render(**inputs)
import json
from base64 import b64decode
# decode and prepare input dict
inputs_obj = json.loads(b64decode('{cls._inputs_placeholder}').decode('utf-8'))
# execute main function
output = main(**inputs_obj)
# convert output and print
result = f'''<<RESULT>>{{output}}<<RESULT>>'''
print(result)
""")
return runner_script
@classmethod
def get_preload_script(cls) -> str:
preload_script = dedent("""
import jinja2
from base64 import b64decode
def _jinja2_preload_():
# prepare jinja2 environment, load template and render before to avoid sandbox issue
template = jinja2.Template('{{s}}')
template.render(s='a')
if __name__ == '__main__':
_jinja2_preload_()
""")
return preload_script

View File

@ -1,83 +1,51 @@
import json
import re
from base64 import b64encode
from textwrap import dedent from textwrap import dedent
from typing import Optional
from core.helper.code_executor.entities import CodeDependency
from core.helper.code_executor.template_transformer import TemplateTransformer from core.helper.code_executor.template_transformer import TemplateTransformer
PYTHON_RUNNER = dedent("""
# declare main function here
{{code}}
from json import loads, dumps
from base64 import b64decode
# execute main function, and return the result
# inputs is a dict, and it
inputs = b64decode('{{inputs}}').decode('utf-8')
output = main(**json.loads(inputs))
# convert output to json and print
output = dumps(output, indent=4)
result = f'''<<RESULT>>
{output}
<<RESULT>>'''
print(result)
""")
PYTHON_PRELOAD = """"""
PYTHON_STANDARD_PACKAGES = {
'json', 'datetime', 'math', 'random', 're', 'string', 'sys', 'time', 'traceback', 'uuid', 'os', 'base64',
'hashlib', 'hmac', 'binascii', 'collections', 'functools', 'operator', 'itertools', 'uuid',
}
class Python3TemplateTransformer(TemplateTransformer): class Python3TemplateTransformer(TemplateTransformer):
@classmethod @classmethod
def transform_caller(cls, code: str, inputs: dict, def get_standard_packages(cls) -> set[str]:
dependencies: Optional[list[CodeDependency]] = None) -> tuple[str, str, list[CodeDependency]]: return {
""" 'base64',
Transform code to python runner 'binascii',
:param code: code 'collections',
:param inputs: inputs 'datetime',
:return: 'functools',
""" 'hashlib',
'hmac',
# transform inputs to json string 'itertools',
inputs_str = b64encode(json.dumps(inputs, ensure_ascii=False).encode()).decode('utf-8') 'json',
'math',
# replace code and inputs 'operator',
runner = PYTHON_RUNNER.replace('{{code}}', code) 'os',
runner = runner.replace('{{inputs}}', inputs_str) 'random',
're',
# add standard packages 'string',
if dependencies is None: 'sys',
dependencies = [] 'time',
'traceback',
for package in PYTHON_STANDARD_PACKAGES: 'uuid',
if package not in dependencies: }
dependencies.append(CodeDependency(name=package, version=''))
# deduplicate
dependencies = list({dep.name: dep for dep in dependencies if dep.name}.values())
return runner, PYTHON_PRELOAD, dependencies
@classmethod @classmethod
def transform_response(cls, response: str) -> dict: def get_runner_script(cls) -> str:
""" runner_script = dedent(f"""
Transform response to dict # declare main function
:param response: response {cls._code_placeholder}
:return:
""" import json
# extract result from base64 import b64decode
result = re.search(r'<<RESULT>>(.*?)<<RESULT>>', response, re.DOTALL)
if not result: # decode and prepare input dict
raise ValueError('Failed to parse result') inputs_obj = json.loads(b64decode('{cls._inputs_placeholder}').decode('utf-8'))
result = result.group(1)
return json.loads(result) # execute main function
output_obj = main(**inputs_obj)
# convert output to json and print
output_json = json.dumps(output_obj, indent=4)
result = f'''<<RESULT>>{{output_json}}<<RESULT>>'''
print(result)
""")
return runner_script

View File

@ -1,12 +1,24 @@
import json
import re
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from base64 import b64encode
from typing import Optional from typing import Optional
from pydantic import BaseModel
from core.helper.code_executor.entities import CodeDependency from core.helper.code_executor.entities import CodeDependency
class TemplateTransformer(ABC): class TemplateTransformer(ABC, BaseModel):
_code_placeholder: str = '{{code}}'
_inputs_placeholder: str = '{{inputs}}'
_result_tag: str = '<<RESULT>>'
@classmethod
def get_standard_packages(cls) -> set[str]:
return set()
@classmethod @classmethod
@abstractmethod
def transform_caller(cls, code: str, inputs: dict, def transform_caller(cls, code: str, inputs: dict,
dependencies: Optional[list[CodeDependency]] = None) -> tuple[str, str, list[CodeDependency]]: dependencies: Optional[list[CodeDependency]] = None) -> tuple[str, str, list[CodeDependency]]:
""" """
@ -15,14 +27,61 @@ class TemplateTransformer(ABC):
:param inputs: inputs :param inputs: inputs
:return: runner, preload :return: runner, preload
""" """
pass runner_script = cls.assemble_runner_script(code, inputs)
preload_script = cls.get_preload_script()
packages = dependencies or []
standard_packages = cls.get_standard_packages()
for package in standard_packages:
if package not in packages:
packages.append(CodeDependency(name=package, version=''))
packages = list({dep.name: dep for dep in packages if dep.name}.values())
return runner_script, preload_script, packages
@classmethod
def extract_result_str_from_response(cls, response: str) -> str:
result = re.search(rf'{cls._result_tag}(.*){cls._result_tag}', response, re.DOTALL)
if not result:
raise ValueError('Failed to parse result')
result = result.group(1)
return result
@classmethod @classmethod
@abstractmethod
def transform_response(cls, response: str) -> dict: def transform_response(cls, response: str) -> dict:
""" """
Transform response to dict Transform response to dict
:param response: response :param response: response
:return: :return:
""" """
return json.loads(cls.extract_result_str_from_response(response))
@classmethod
@abstractmethod
def get_runner_script(cls) -> str:
"""
Get runner script
"""
pass pass
@classmethod
def serialize_inputs(cls, inputs: dict) -> str:
inputs_json_str = json.dumps(inputs, ensure_ascii=False).encode()
input_base64_encoded = b64encode(inputs_json_str).decode('utf-8')
return input_base64_encoded
@classmethod
def assemble_runner_script(cls, code: str, inputs: dict) -> str:
# assemble runner script
script = cls.get_runner_script()
script = script.replace(cls._code_placeholder, code)
inputs_str = cls.serialize_inputs(inputs)
script = script.replace(cls._inputs_placeholder, inputs_str)
return script
@classmethod
def get_preload_script(cls) -> str:
"""
Get preload script
"""
return ''

View File

@ -2,6 +2,7 @@ from textwrap import dedent
from core.helper.code_executor.code_executor import CodeExecutor, CodeLanguage from core.helper.code_executor.code_executor import CodeExecutor, CodeLanguage
from core.helper.code_executor.javascript.javascript_code_provider import JavascriptCodeProvider from core.helper.code_executor.javascript.javascript_code_provider import JavascriptCodeProvider
from core.helper.code_executor.javascript.javascript_transformer import NodeJsTemplateTransformer
CODE_LANGUAGE = CodeLanguage.JAVASCRIPT CODE_LANGUAGE = CodeLanguage.JAVASCRIPT
@ -23,7 +24,8 @@ def test_javascript_json():
def test_javascript_with_code_template(): def test_javascript_with_code_template():
result = CodeExecutor.execute_workflow_code_template( result = CodeExecutor.execute_workflow_code_template(
language=CODE_LANGUAGE, code=JavascriptCodeProvider.get_default_code(), inputs={'arg1': 'Hello', 'arg2': 'World'}) language=CODE_LANGUAGE, code=JavascriptCodeProvider.get_default_code(),
inputs={'arg1': 'Hello', 'arg2': 'World'})
assert result == {'result': 'HelloWorld'} assert result == {'result': 'HelloWorld'}
@ -32,3 +34,10 @@ def test_javascript_list_default_available_packages():
# no default packages available for javascript # no default packages available for javascript
assert len(packages) == 0 assert len(packages) == 0
def test_javascript_get_runner_script():
runner_script = NodeJsTemplateTransformer.get_runner_script()
assert runner_script.count(NodeJsTemplateTransformer._code_placeholder) == 1
assert runner_script.count(NodeJsTemplateTransformer._inputs_placeholder) == 1
assert runner_script.count(NodeJsTemplateTransformer._result_tag) == 2

View File

@ -1,7 +1,7 @@
import base64 import base64
from core.helper.code_executor.code_executor import CodeExecutor, CodeLanguage from core.helper.code_executor.code_executor import CodeExecutor, CodeLanguage
from core.helper.code_executor.jinja2.jinja2_transformer import JINJA2_PRELOAD, PYTHON_RUNNER from core.helper.code_executor.jinja2.jinja2_transformer import Jinja2TemplateTransformer
CODE_LANGUAGE = CodeLanguage.JINJA2 CODE_LANGUAGE = CodeLanguage.JINJA2
@ -9,8 +9,12 @@ CODE_LANGUAGE = CodeLanguage.JINJA2
def test_jinja2(): def test_jinja2():
template = 'Hello {{template}}' template = 'Hello {{template}}'
inputs = base64.b64encode(b'{"template": "World"}').decode('utf-8') inputs = base64.b64encode(b'{"template": "World"}').decode('utf-8')
code = PYTHON_RUNNER.replace('{{code}}', template).replace('{{inputs}}', inputs) code = (Jinja2TemplateTransformer.get_runner_script()
result = CodeExecutor.execute_code(language=CODE_LANGUAGE, preload=JINJA2_PRELOAD, code=code) .replace(Jinja2TemplateTransformer._code_placeholder, template)
.replace(Jinja2TemplateTransformer._inputs_placeholder, inputs))
result = CodeExecutor.execute_code(language=CODE_LANGUAGE,
preload=Jinja2TemplateTransformer.get_preload_script(),
code=code)
assert result == '<<RESULT>>Hello World<<RESULT>>\n' assert result == '<<RESULT>>Hello World<<RESULT>>\n'
@ -18,3 +22,10 @@ def test_jinja2_with_code_template():
result = CodeExecutor.execute_workflow_code_template( result = CodeExecutor.execute_workflow_code_template(
language=CODE_LANGUAGE, code='Hello {{template}}', inputs={'template': 'World'}) language=CODE_LANGUAGE, code='Hello {{template}}', inputs={'template': 'World'})
assert result == {'result': 'Hello World'} assert result == {'result': 'Hello World'}
def test_jinja2_get_runner_script():
runner_script = Jinja2TemplateTransformer.get_runner_script()
assert runner_script.count(Jinja2TemplateTransformer._code_placeholder) == 1
assert runner_script.count(Jinja2TemplateTransformer._inputs_placeholder) == 1
assert runner_script.count(Jinja2TemplateTransformer._result_tag) == 2

View File

@ -3,6 +3,7 @@ from textwrap import dedent
from core.helper.code_executor.code_executor import CodeExecutor, CodeLanguage from core.helper.code_executor.code_executor import CodeExecutor, CodeLanguage
from core.helper.code_executor.python3.python3_code_provider import Python3CodeProvider from core.helper.code_executor.python3.python3_code_provider import Python3CodeProvider
from core.helper.code_executor.python3.python3_transformer import Python3TemplateTransformer
CODE_LANGUAGE = CodeLanguage.PYTHON3 CODE_LANGUAGE = CodeLanguage.PYTHON3
@ -35,3 +36,10 @@ def test_python3_list_default_available_packages():
# check JSON serializable # check JSON serializable
assert len(str(json.dumps(packages))) > 0 assert len(str(json.dumps(packages))) > 0
def test_python3_get_runner_script():
runner_script = Python3TemplateTransformer.get_runner_script()
assert runner_script.count(Python3TemplateTransformer._code_placeholder) == 1
assert runner_script.count(Python3TemplateTransformer._inputs_placeholder) == 1
assert runner_script.count(Python3TemplateTransformer._result_tag) == 2